xref: /netbsd-src/external/gpl3/gdb/dist/sim/rx/rx.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* rx.c --- opcode semantics for stand-alone RX simulator.
2 
3 Copyright (C) 2008-2014 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
5 
6 This file is part of the GNU simulators.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <signal.h>
26 
27 #include "opcode/rx.h"
28 #include "cpu.h"
29 #include "mem.h"
30 #include "syscalls.h"
31 #include "fpu.h"
32 #include "err.h"
33 #include "misc.h"
34 
35 #ifdef CYCLE_STATS
36 static const char * id_names[] = {
37   "RXO_unknown",
38   "RXO_mov",	/* d = s (signed) */
39   "RXO_movbi",	/* d = [s,s2] (signed) */
40   "RXO_movbir",	/* [s,s2] = d (signed) */
41   "RXO_pushm",	/* s..s2 */
42   "RXO_popm",	/* s..s2 */
43   "RXO_xchg",	/* s <-> d */
44   "RXO_stcc",	/* d = s if cond(s2) */
45   "RXO_rtsd",	/* rtsd, 1=imm, 2-0 = reg if reg type */
46 
47   /* These are all either d OP= s or, if s2 is set, d = s OP s2.  Note
48      that d may be "None".  */
49   "RXO_and",
50   "RXO_or",
51   "RXO_xor",
52   "RXO_add",
53   "RXO_sub",
54   "RXO_mul",
55   "RXO_div",
56   "RXO_divu",
57   "RXO_shll",
58   "RXO_shar",
59   "RXO_shlr",
60 
61   "RXO_adc",	/* d = d + s + carry */
62   "RXO_sbb",	/* d = d - s - ~carry */
63   "RXO_abs",	/* d = |s| */
64   "RXO_max",	/* d = max(d,s) */
65   "RXO_min",	/* d = min(d,s) */
66   "RXO_emul",	/* d:64 = d:32 * s */
67   "RXO_emulu",	/* d:64 = d:32 * s (unsigned) */
68 
69   "RXO_rolc",	/* d <<= 1 through carry */
70   "RXO_rorc",	/* d >>= 1 through carry*/
71   "RXO_rotl",	/* d <<= #s without carry */
72   "RXO_rotr",	/* d >>= #s without carry*/
73   "RXO_revw",	/* d = revw(s) */
74   "RXO_revl",	/* d = revl(s) */
75   "RXO_branch",	/* pc = d if cond(s) */
76   "RXO_branchrel",/* pc += d if cond(s) */
77   "RXO_jsr",	/* pc = d */
78   "RXO_jsrrel",	/* pc += d */
79   "RXO_rts",
80   "RXO_nop",
81   "RXO_nop2",
82   "RXO_nop3",
83 
84   "RXO_scmpu",
85   "RXO_smovu",
86   "RXO_smovb",
87   "RXO_suntil",
88   "RXO_swhile",
89   "RXO_smovf",
90   "RXO_sstr",
91 
92   "RXO_rmpa",
93   "RXO_mulhi",
94   "RXO_mullo",
95   "RXO_machi",
96   "RXO_maclo",
97   "RXO_mvtachi",
98   "RXO_mvtaclo",
99   "RXO_mvfachi",
100   "RXO_mvfacmi",
101   "RXO_mvfaclo",
102   "RXO_racw",
103 
104   "RXO_sat",	/* sat(d) */
105   "RXO_satr",
106 
107   "RXO_fadd",	/* d op= s */
108   "RXO_fcmp",
109   "RXO_fsub",
110   "RXO_ftoi",
111   "RXO_fmul",
112   "RXO_fdiv",
113   "RXO_round",
114   "RXO_itof",
115 
116   "RXO_bset",	/* d |= (1<<s) */
117   "RXO_bclr",	/* d &= ~(1<<s) */
118   "RXO_btst",	/* s & (1<<s2) */
119   "RXO_bnot",	/* d ^= (1<<s) */
120   "RXO_bmcc",	/* d<s> = cond(s2) */
121 
122   "RXO_clrpsw",	/* flag index in d */
123   "RXO_setpsw",	/* flag index in d */
124   "RXO_mvtipl",	/* new IPL in s */
125 
126   "RXO_rtfi",
127   "RXO_rte",
128   "RXO_rtd",	/* undocumented */
129   "RXO_brk",
130   "RXO_dbt",	/* undocumented */
131   "RXO_int",	/* vector id in s */
132   "RXO_stop",
133   "RXO_wait",
134 
135   "RXO_sccnd",	/* d = cond(s) ? 1 : 0 */
136 };
137 
138 static const char * optype_names[] = {
139   " -  ",
140   "#Imm",	/* #addend */
141   " Rn ",	/* Rn */
142   "[Rn]",	/* [Rn + addend] */
143   "Ps++",	/* [Rn+] */
144   "--Pr",	/* [-Rn] */
145   " cc ",	/* eq, gtu, etc */
146   "Flag",	/* [UIOSZC] */
147   "RbRi"	/* [Rb + scale * Ri] */
148 };
149 
150 #define N_RXO (sizeof(id_names)/sizeof(id_names[0]))
151 #define N_RXT (sizeof(optype_names)/sizeof(optype_names[0]))
152 #define N_MAP 30
153 
154 static unsigned long long benchmark_start_cycle;
155 static unsigned long long benchmark_end_cycle;
156 
157 static int op_cache[N_RXT][N_RXT][N_RXT];
158 static int op_cache_rev[N_MAP];
159 static int op_cache_idx = 0;
160 
161 static int
162 op_lookup (int a, int b, int c)
163 {
164   if (op_cache[a][b][c])
165     return op_cache[a][b][c];
166   op_cache_idx ++;
167   if (op_cache_idx >= N_MAP)
168     {
169       printf("op_cache_idx exceeds %d\n", N_MAP);
170       exit(1);
171     }
172   op_cache[a][b][c] = op_cache_idx;
173   op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c;
174   return op_cache_idx;
175 }
176 
177 static char *
178 op_cache_string (int map)
179 {
180   static int ci;
181   static char cb[5][20];
182   int a, b, c;
183 
184   map = op_cache_rev[map];
185   a = (map >> 8) & 15;
186   b = (map >> 4) & 15;
187   c = (map >> 0) & 15;
188   ci = (ci + 1) % 5;
189   sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]);
190   return cb[ci];
191 }
192 
193 static unsigned long long cycles_per_id[N_RXO][N_MAP];
194 static unsigned long long times_per_id[N_RXO][N_MAP];
195 static unsigned long long memory_stalls;
196 static unsigned long long register_stalls;
197 static unsigned long long branch_stalls;
198 static unsigned long long branch_alignment_stalls;
199 static unsigned long long fast_returns;
200 
201 static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP];
202 static int prev_opcode_id = RXO_unknown;
203 static int po0;
204 
205 #define STATS(x) x
206 
207 #else
208 #define STATS(x)
209 #endif /* CYCLE_STATS */
210 
211 
212 #ifdef CYCLE_ACCURATE
213 
214 static int new_rt = -1;
215 
216 /* Number of cycles to add if an insn spans an 8-byte boundary.  */
217 static int branch_alignment_penalty = 0;
218 
219 #endif
220 
221 static int running_benchmark = 1;
222 
223 #define tprintf if (trace && running_benchmark) printf
224 
225 jmp_buf decode_jmp_buf;
226 unsigned int rx_cycles = 0;
227 
228 #ifdef CYCLE_ACCURATE
229 /* If nonzero, memory was read at some point and cycle latency might
230    take effect.  */
231 static int memory_source = 0;
232 /* If nonzero, memory was written and extra cycles might be
233    needed.  */
234 static int memory_dest = 0;
235 
236 static void
237 cycles (int throughput)
238 {
239   tprintf("%d cycles\n", throughput);
240   regs.cycle_count += throughput;
241 }
242 
243 /* Number of execution (E) cycles the op uses.  For memory sources, we
244    include the load micro-op stall as two extra E cycles.  */
245 #define E(c) cycles (memory_source ? c + 2 : c)
246 #define E1 cycles (1)
247 #define E2 cycles (2)
248 #define EBIT cycles (memory_source ? 2 : 1)
249 
250 /* Check to see if a read latency must be applied for a given register.  */
251 #define RL(r) \
252   if (regs.rt == r )							\
253     {									\
254       tprintf("register %d load stall\n", r);				\
255       regs.cycle_count ++;						\
256       STATS(register_stalls ++);					\
257       regs.rt = -1;							\
258     }
259 
260 #define RLD(r)					\
261   if (memory_source)				\
262     {						\
263       tprintf ("Rt now %d\n", r);		\
264       new_rt = r;				\
265     }
266 
267 static int
268 lsb_count (unsigned long v, int is_signed)
269 {
270   int i, lsb;
271   if (is_signed && (v & 0x80000000U))
272     v = (unsigned long)(long)(-v);
273   for (i=31; i>=0; i--)
274     if (v & (1 << i))
275       {
276 	/* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
277 	lsb = (i + 2) / 2;
278 	return lsb;
279       }
280   return 0;
281 }
282 
283 static int
284 divu_cycles(unsigned long num, unsigned long den)
285 {
286   int nb = lsb_count (num, 0);
287   int db = lsb_count (den, 0);
288   int rv;
289 
290   if (nb < db)
291     rv = 2;
292   else
293     rv = 3 + nb - db;
294   E (rv);
295   return rv;
296 }
297 
298 static int
299 div_cycles(long num, long den)
300 {
301   int nb = lsb_count ((unsigned long)num, 1);
302   int db = lsb_count ((unsigned long)den, 1);
303   int rv;
304 
305   if (nb < db)
306     rv = 3;
307   else
308     rv = 5 + nb - db;
309   E (rv);
310   return rv;
311 }
312 
313 #else /* !CYCLE_ACCURATE */
314 
315 #define cycles(t)
316 #define E(c)
317 #define E1
318 #define E2
319 #define EBIT
320 #define RL(r)
321 #define RLD(r)
322 
323 #define divu_cycles(n,d)
324 #define div_cycles(n,d)
325 
326 #endif /* else CYCLE_ACCURATE */
327 
328 static int size2bytes[] = {
329   4, 1, 1, 1, 2, 2, 2, 3, 4
330 };
331 
332 typedef struct {
333   unsigned long dpc;
334 } RX_Data;
335 
336 #define rx_abort() _rx_abort(__FILE__, __LINE__)
337 static void
338 _rx_abort (const char *file, int line)
339 {
340   if (strrchr (file, '/'))
341     file = strrchr (file, '/') + 1;
342   fprintf(stderr, "abort at %s:%d\n", file, line);
343   abort();
344 }
345 
346 static unsigned char *get_byte_base;
347 static RX_Opcode_Decoded **decode_cache_base;
348 static SI get_byte_page;
349 
350 void
351 reset_decoder (void)
352 {
353   get_byte_base = 0;
354   decode_cache_base = 0;
355   get_byte_page = 0;
356 }
357 
358 static inline void
359 maybe_get_mem_page (SI tpc)
360 {
361   if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting)
362     {
363       get_byte_page = tpc & NONPAGE_MASK;
364       get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page;
365       decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page;
366     }
367 }
368 
369 /* This gets called a *lot* so optimize it.  */
370 static int
371 rx_get_byte (void *vdata)
372 {
373   RX_Data *rx_data = (RX_Data *)vdata;
374   SI tpc = rx_data->dpc;
375 
376   /* See load.c for an explanation of this.  */
377   if (rx_big_endian)
378     tpc ^= 3;
379 
380   maybe_get_mem_page (tpc);
381 
382   rx_data->dpc ++;
383   return get_byte_base [tpc];
384 }
385 
386 static int
387 get_op (const RX_Opcode_Decoded *rd, int i)
388 {
389   const RX_Opcode_Operand *o = rd->op + i;
390   int addr, rv = 0;
391 
392   switch (o->type)
393     {
394     case RX_Operand_None:
395       rx_abort ();
396 
397     case RX_Operand_Immediate:	/* #addend */
398       return o->addend;
399 
400     case RX_Operand_Register:	/* Rn */
401       RL (o->reg);
402       rv = get_reg (o->reg);
403       break;
404 
405     case RX_Operand_Predec:	/* [-Rn] */
406       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
407       /* fall through */
408     case RX_Operand_Postinc:	/* [Rn+] */
409     case RX_Operand_Indirect:	/* [Rn + addend] */
410     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
411 #ifdef CYCLE_ACCURATE
412       RL (o->reg);
413       if (o->type == RX_Operand_TwoReg)
414 	RL (rd->op[2].reg);
415       regs.rt = -1;
416       if (regs.m2m == M2M_BOTH)
417 	{
418 	  tprintf("src memory stall\n");
419 #ifdef CYCLE_STATS
420 	  memory_stalls ++;
421 #endif
422 	  regs.cycle_count ++;
423 	  regs.m2m = 0;
424 	}
425 
426       memory_source = 1;
427 #endif
428 
429       if (o->type == RX_Operand_TwoReg)
430 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
431       else
432 	addr = get_reg (o->reg) + o->addend;
433 
434       switch (o->size)
435 	{
436 	case RX_AnySize:
437 	  rx_abort ();
438 
439 	case RX_Byte: /* undefined extension */
440 	case RX_UByte:
441 	case RX_SByte:
442 	  rv = mem_get_qi (addr);
443 	  break;
444 
445 	case RX_Word: /* undefined extension */
446 	case RX_UWord:
447 	case RX_SWord:
448 	  rv = mem_get_hi (addr);
449 	  break;
450 
451 	case RX_3Byte:
452 	  rv = mem_get_psi (addr);
453 	  break;
454 
455 	case RX_Long:
456 	  rv = mem_get_si (addr);
457 	  break;
458 	}
459 
460       if (o->type == RX_Operand_Postinc)
461 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
462 
463       break;
464 
465     case RX_Operand_Condition:	/* eq, gtu, etc */
466       return condition_true (o->reg);
467 
468     case RX_Operand_Flag:	/* [UIOSZC] */
469       return (regs.r_psw & (1 << o->reg)) ? 1 : 0;
470     }
471 
472   /* if we've gotten here, we need to clip/extend the value according
473      to the size.  */
474   switch (o->size)
475     {
476     case RX_AnySize:
477       rx_abort ();
478 
479     case RX_Byte: /* undefined extension */
480       rv |= 0xdeadbe00; /* keep them honest */
481       break;
482 
483     case RX_UByte:
484       rv &= 0xff;
485       break;
486 
487     case RX_SByte:
488       rv = sign_ext (rv, 8);
489       break;
490 
491     case RX_Word: /* undefined extension */
492       rv |= 0xdead0000; /* keep them honest */
493       break;
494 
495     case RX_UWord:
496       rv &=  0xffff;
497       break;
498 
499     case RX_SWord:
500       rv = sign_ext (rv, 16);
501       break;
502 
503     case RX_3Byte:
504       rv &= 0xffffff;
505       break;
506 
507     case RX_Long:
508       break;
509     }
510   return rv;
511 }
512 
513 static void
514 put_op (const RX_Opcode_Decoded *rd, int i, int v)
515 {
516   const RX_Opcode_Operand *o = rd->op + i;
517   int addr;
518 
519   switch (o->size)
520     {
521     case RX_AnySize:
522       if (o->type != RX_Operand_Register)
523 	rx_abort ();
524       break;
525 
526     case RX_Byte: /* undefined extension */
527       v |= 0xdeadbe00; /* keep them honest */
528       break;
529 
530     case RX_UByte:
531       v &= 0xff;
532       break;
533 
534     case RX_SByte:
535       v = sign_ext (v, 8);
536       break;
537 
538     case RX_Word: /* undefined extension */
539       v |= 0xdead0000; /* keep them honest */
540       break;
541 
542     case RX_UWord:
543       v &=  0xffff;
544       break;
545 
546     case RX_SWord:
547       v = sign_ext (v, 16);
548       break;
549 
550     case RX_3Byte:
551       v &= 0xffffff;
552       break;
553 
554     case RX_Long:
555       break;
556     }
557 
558   switch (o->type)
559     {
560     case RX_Operand_None:
561       /* Opcodes like TST and CMP use this.  */
562       break;
563 
564     case RX_Operand_Immediate:	/* #addend */
565     case RX_Operand_Condition:	/* eq, gtu, etc */
566       rx_abort ();
567 
568     case RX_Operand_Register:	/* Rn */
569       put_reg (o->reg, v);
570       RLD (o->reg);
571       break;
572 
573     case RX_Operand_Predec:	/* [-Rn] */
574       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
575       /* fall through */
576     case RX_Operand_Postinc:	/* [Rn+] */
577     case RX_Operand_Indirect:	/* [Rn + addend] */
578     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
579 
580 #ifdef CYCLE_ACCURATE
581       if (regs.m2m == M2M_BOTH)
582 	{
583 	  tprintf("dst memory stall\n");
584 	  regs.cycle_count ++;
585 #ifdef CYCLE_STATS
586 	  memory_stalls ++;
587 #endif
588 	  regs.m2m = 0;
589 	}
590       memory_dest = 1;
591 #endif
592 
593       if (o->type == RX_Operand_TwoReg)
594 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
595       else
596 	addr = get_reg (o->reg) + o->addend;
597 
598       switch (o->size)
599 	{
600 	case RX_AnySize:
601 	  rx_abort ();
602 
603 	case RX_Byte: /* undefined extension */
604 	case RX_UByte:
605 	case RX_SByte:
606 	  mem_put_qi (addr, v);
607 	  break;
608 
609 	case RX_Word: /* undefined extension */
610 	case RX_UWord:
611 	case RX_SWord:
612 	  mem_put_hi (addr, v);
613 	  break;
614 
615 	case RX_3Byte:
616 	  mem_put_psi (addr, v);
617 	  break;
618 
619 	case RX_Long:
620 	  mem_put_si (addr, v);
621 	  break;
622 	}
623 
624       if (o->type == RX_Operand_Postinc)
625 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
626 
627       break;
628 
629     case RX_Operand_Flag:	/* [UIOSZC] */
630       if (v)
631 	regs.r_psw |= (1 << o->reg);
632       else
633 	regs.r_psw &= ~(1 << o->reg);
634       break;
635     }
636 }
637 
638 #define PD(x) put_op (opcode, 0, x)
639 #define PS(x) put_op (opcode, 1, x)
640 #define PS2(x) put_op (opcode, 2, x)
641 #define GD() get_op (opcode, 0)
642 #define GS() get_op (opcode, 1)
643 #define GS2() get_op (opcode, 2)
644 #define DSZ() size2bytes[opcode->op[0].size]
645 #define SSZ() size2bytes[opcode->op[0].size]
646 #define S2SZ() size2bytes[opcode->op[0].size]
647 
648 /* "Universal" sources.  */
649 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
650 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
651 
652 static void
653 push(int val)
654 {
655   int rsp = get_reg (sp);
656   rsp -= 4;
657   put_reg (sp, rsp);
658   mem_put_si (rsp, val);
659 }
660 
661 /* Just like the above, but tag the memory as "pushed pc" so if anyone
662    tries to write to it, it will cause an error.  */
663 static void
664 pushpc(int val)
665 {
666   int rsp = get_reg (sp);
667   rsp -= 4;
668   put_reg (sp, rsp);
669   mem_put_si (rsp, val);
670   mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC);
671 }
672 
673 static int
674 pop()
675 {
676   int rv;
677   int rsp = get_reg (sp);
678   rv = mem_get_si (rsp);
679   rsp += 4;
680   put_reg (sp, rsp);
681   return rv;
682 }
683 
684 static int
685 poppc()
686 {
687   int rv;
688   int rsp = get_reg (sp);
689   if (mem_get_content_type (rsp) != MC_PUSHED_PC)
690     execution_error (SIM_ERR_CORRUPT_STACK, rsp);
691   rv = mem_get_si (rsp);
692   mem_set_content_range (rsp, rsp+3, MC_UNINIT);
693   rsp += 4;
694   put_reg (sp, rsp);
695   return rv;
696 }
697 
698 #define MATH_OP(vop,c)				\
699 { \
700   umb = US2(); \
701   uma = US1(); \
702   ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
703   tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
704   ma = sign_ext (uma, DSZ() * 8);					\
705   mb = sign_ext (umb, DSZ() * 8);					\
706   sll = (long long) ma vop (long long) mb vop c; \
707   tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
708   set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
709   PD (sll); \
710   E (1);    \
711 }
712 
713 #define LOGIC_OP(vop) \
714 { \
715   mb = US2(); \
716   ma = US1(); \
717   v = ma vop mb; \
718   tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
719   set_sz (v, DSZ()); \
720   PD(v); \
721   E (1); \
722 }
723 
724 #define SHIFT_OP(val, type, count, OP, carry_mask)	\
725 { \
726   int i, c=0; \
727   count = US2(); \
728   val = (type)US1();				\
729   tprintf("%lld " #OP " %d\n", val, count); \
730   for (i = 0; i < count; i ++) \
731     { \
732       c = val & carry_mask; \
733       val OP 1; \
734     } \
735   set_oszc (val, 4, c); \
736   PD (val); \
737 }
738 
739 typedef union {
740   int i;
741   float f;
742 } FloatInt;
743 
744 static inline int
745 float2int (float f)
746 {
747   FloatInt fi;
748   fi.f = f;
749   return fi.i;
750 }
751 
752 static inline float
753 int2float (int i)
754 {
755   FloatInt fi;
756   fi.i = i;
757   return fi.f;
758 }
759 
760 static int
761 fop_fadd (fp_t s1, fp_t s2, fp_t *d)
762 {
763   *d = rxfp_add (s1, s2);
764   return 1;
765 }
766 
767 static int
768 fop_fmul (fp_t s1, fp_t s2, fp_t *d)
769 {
770   *d = rxfp_mul (s1, s2);
771   return 1;
772 }
773 
774 static int
775 fop_fdiv (fp_t s1, fp_t s2, fp_t *d)
776 {
777   *d = rxfp_div (s1, s2);
778   return 1;
779 }
780 
781 static int
782 fop_fsub (fp_t s1, fp_t s2, fp_t *d)
783 {
784   *d = rxfp_sub (s1, s2);
785   return 1;
786 }
787 
788 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
789 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
790 #define FPCHECK() \
791   if (FPPENDING()) \
792     return do_fp_exception (opcode_pc)
793 
794 #define FLOAT_OP(func) \
795 { \
796   int do_store;   \
797   fp_t fa, fb, fc; \
798   FPCLEAR(); \
799   fb = GS (); \
800   fa = GD (); \
801   do_store = fop_##func (fa, fb, &fc); \
802   tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
803   FPCHECK(); \
804   if (do_store) \
805     PD (fc);	\
806   mb = 0; \
807   if ((fc & 0x80000000UL) != 0) \
808     mb |= FLAGBIT_S; \
809   if ((fc & 0x7fffffffUL) == 0)			\
810     mb |= FLAGBIT_Z; \
811   set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
812 }
813 
814 #define carry (FLAG_C ? 1 : 0)
815 
816 static struct {
817   unsigned long vaddr;
818   const char *str;
819   int signal;
820 } exception_info[] = {
821   { 0xFFFFFFD0UL, "priviledged opcode", SIGILL },
822   { 0xFFFFFFD4UL, "access violation", SIGSEGV },
823   { 0xFFFFFFDCUL, "undefined opcode", SIGILL },
824   { 0xFFFFFFE4UL, "floating point", SIGFPE }
825 };
826 #define EX_PRIVILEDGED	0
827 #define EX_ACCESS	1
828 #define EX_UNDEFINED	2
829 #define EX_FLOATING	3
830 #define EXCEPTION(n)  \
831   return generate_exception (n, opcode_pc)
832 
833 #define PRIVILEDGED() \
834   if (FLAG_PM) \
835     EXCEPTION (EX_PRIVILEDGED)
836 
837 static int
838 generate_exception (unsigned long type, SI opcode_pc)
839 {
840   SI old_psw, old_pc, new_pc;
841 
842   new_pc = mem_get_si (exception_info[type].vaddr);
843   /* 0x00020000 is the value used to initialise the known
844      exception vectors (see rx.ld), but it is a reserved
845      area of memory so do not try to access it, and if the
846      value has not been changed by the program then the
847      vector has not been installed.  */
848   if (new_pc == 0 || new_pc == 0x00020000)
849     {
850       if (rx_in_gdb)
851 	return RX_MAKE_STOPPED (exception_info[type].signal);
852 
853       fprintf(stderr, "Unhandled %s exception at pc = %#lx\n",
854 	      exception_info[type].str, (unsigned long) opcode_pc);
855       if (type == EX_FLOATING)
856 	{
857 	  int mask = FPPENDING ();
858 	  fprintf (stderr, "Pending FP exceptions:");
859 	  if (mask & FPSWBITS_FV)
860 	    fprintf(stderr, " Invalid");
861 	  if (mask & FPSWBITS_FO)
862 	    fprintf(stderr, " Overflow");
863 	  if (mask & FPSWBITS_FZ)
864 	    fprintf(stderr, " Division-by-zero");
865 	  if (mask & FPSWBITS_FU)
866 	    fprintf(stderr, " Underflow");
867 	  if (mask & FPSWBITS_FX)
868 	    fprintf(stderr, " Inexact");
869 	  if (mask & FPSWBITS_CE)
870 	    fprintf(stderr, " Unimplemented");
871 	  fprintf(stderr, "\n");
872 	}
873       return RX_MAKE_EXITED (1);
874     }
875 
876   tprintf ("Triggering %s exception\n", exception_info[type].str);
877 
878   old_psw = regs.r_psw;
879   regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
880   old_pc = opcode_pc;
881   regs.r_pc = new_pc;
882   pushpc (old_psw);
883   pushpc (old_pc);
884   return RX_MAKE_STEPPED ();
885 }
886 
887 void
888 generate_access_exception (void)
889 {
890   int rv;
891 
892   rv = generate_exception (EX_ACCESS, regs.r_pc);
893   if (RX_EXITED (rv))
894     longjmp (decode_jmp_buf, rv);
895 }
896 
897 static int
898 do_fp_exception (unsigned long opcode_pc)
899 {
900   while (FPPENDING())
901     EXCEPTION (EX_FLOATING);
902   return RX_MAKE_STEPPED ();
903 }
904 
905 static int
906 op_is_memory (const RX_Opcode_Decoded *rd, int i)
907 {
908   switch (rd->op[i].type)
909     {
910     case RX_Operand_Predec:
911     case RX_Operand_Postinc:
912     case RX_Operand_Indirect:
913       return 1;
914     default:
915       return 0;
916     }
917 }
918 #define OM(i) op_is_memory (opcode, i)
919 
920 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
921 
922 int
923 decode_opcode ()
924 {
925   unsigned int uma=0, umb=0;
926   int ma=0, mb=0;
927   int opcode_size, v;
928   unsigned long long ll;
929   long long sll;
930   unsigned long opcode_pc;
931   RX_Data rx_data;
932   const RX_Opcode_Decoded *opcode;
933 #ifdef CYCLE_STATS
934   unsigned long long prev_cycle_count;
935 #endif
936 #ifdef CYCLE_ACCURATE
937   unsigned int tx;
938 #endif
939 
940 #ifdef CYCLE_STATS
941   prev_cycle_count = regs.cycle_count;
942 #endif
943 
944 #ifdef CYCLE_ACCURATE
945   memory_source = 0;
946   memory_dest = 0;
947 #endif
948 
949   rx_cycles ++;
950 
951   maybe_get_mem_page (regs.r_pc);
952 
953   opcode_pc = regs.r_pc;
954 
955   /* Note that we don't word-swap this point, there's no point.  */
956   if (decode_cache_base[opcode_pc] == NULL)
957     {
958       RX_Opcode_Decoded *opcode_w;
959       rx_data.dpc = opcode_pc;
960       opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded));
961       opcode_size = rx_decode_opcode (opcode_pc, opcode_w,
962 				      rx_get_byte, &rx_data);
963       opcode = opcode_w;
964     }
965   else
966     {
967       opcode = decode_cache_base[opcode_pc];
968       opcode_size = opcode->n_bytes;
969     }
970 
971 #ifdef CYCLE_ACCURATE
972   if (branch_alignment_penalty)
973     {
974       if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7)
975 	{
976 	  tprintf("1 cycle branch alignment penalty\n");
977 	  cycles (branch_alignment_penalty);
978 #ifdef CYCLE_STATS
979 	  branch_alignment_stalls ++;
980 #endif
981 	}
982       branch_alignment_penalty = 0;
983     }
984 #endif
985 
986   regs.r_pc += opcode_size;
987 
988   rx_flagmask = opcode->flags_s;
989   rx_flagand = ~(int)opcode->flags_0;
990   rx_flagor = opcode->flags_1;
991 
992   switch (opcode->id)
993     {
994     case RXO_abs:
995       sll = GS ();
996       tprintf("|%lld| = ", sll);
997       if (sll < 0)
998 	sll = -sll;
999       tprintf("%lld\n", sll);
1000       PD (sll);
1001       set_osz (sll, 4);
1002       E (1);
1003       break;
1004 
1005     case RXO_adc:
1006       MATH_OP (+,carry);
1007       break;
1008 
1009     case RXO_add:
1010       MATH_OP (+,0);
1011       break;
1012 
1013     case RXO_and:
1014       LOGIC_OP (&);
1015       break;
1016 
1017     case RXO_bclr:
1018       ma = GD ();
1019       mb = GS ();
1020       if (opcode->op[0].type == RX_Operand_Register)
1021 	mb &= 0x1f;
1022       else
1023 	mb &= 0x07;
1024       ma &= ~(1 << mb);
1025       PD (ma);
1026       EBIT;
1027       break;
1028 
1029     case RXO_bmcc:
1030       ma = GD ();
1031       mb = GS ();
1032       if (opcode->op[0].type == RX_Operand_Register)
1033 	mb &= 0x1f;
1034       else
1035 	mb &= 0x07;
1036       if (GS2 ())
1037 	ma |= (1 << mb);
1038       else
1039 	ma &= ~(1 << mb);
1040       PD (ma);
1041       EBIT;
1042       break;
1043 
1044     case RXO_bnot:
1045       ma = GD ();
1046       mb = GS ();
1047       if (opcode->op[0].type == RX_Operand_Register)
1048 	mb &= 0x1f;
1049       else
1050 	mb &= 0x07;
1051       ma ^= (1 << mb);
1052       PD (ma);
1053       EBIT;
1054       break;
1055 
1056     case RXO_branch:
1057       if (opcode->op[1].type == RX_Operand_None || GS())
1058 	{
1059 #ifdef CYCLE_ACCURATE
1060 	  SI old_pc = regs.r_pc;
1061 	  int delta;
1062 #endif
1063 	  regs.r_pc = GD();
1064 #ifdef CYCLE_ACCURATE
1065 	  delta = regs.r_pc - old_pc;
1066 	  if (delta >= 0 && delta < 16
1067 	      && opcode_size > 1)
1068 	    {
1069 	      tprintf("near forward branch bonus\n");
1070 	      cycles (2);
1071 	    }
1072 	  else
1073 	    {
1074 	      cycles (3);
1075 	      branch_alignment_penalty = 1;
1076 	    }
1077 #ifdef CYCLE_STATS
1078 	  branch_stalls ++;
1079 #endif
1080 #endif
1081 	}
1082 #ifdef CYCLE_ACCURATE
1083       else
1084 	cycles (1);
1085 #endif
1086       break;
1087 
1088     case RXO_branchrel:
1089       if (opcode->op[1].type == RX_Operand_None || GS())
1090 	{
1091 	  int delta = GD();
1092 	  regs.r_pc = opcode_pc + delta;
1093 #ifdef CYCLE_ACCURATE
1094 	  /* Note: specs say 3, chip says 2.  */
1095 	  if (delta >= 0 && delta < 16
1096 	      && opcode_size > 1)
1097 	    {
1098 	      tprintf("near forward branch bonus\n");
1099 	      cycles (2);
1100 	    }
1101 	  else
1102 	    {
1103 	      cycles (3);
1104 	      branch_alignment_penalty = 1;
1105 	    }
1106 #ifdef CYCLE_STATS
1107 	  branch_stalls ++;
1108 #endif
1109 #endif
1110 	}
1111 #ifdef CYCLE_ACCURATE
1112       else
1113 	cycles (1);
1114 #endif
1115       break;
1116 
1117     case RXO_brk:
1118       {
1119 	int old_psw = regs.r_psw;
1120 	if (rx_in_gdb)
1121 	  DO_RETURN (RX_MAKE_HIT_BREAK ());
1122 	if (regs.r_intb == 0)
1123 	  {
1124 	    tprintf("BREAK hit, no vector table.\n");
1125 	    DO_RETURN (RX_MAKE_EXITED(1));
1126 	  }
1127 	regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1128 	pushpc (old_psw);
1129 	pushpc (regs.r_pc);
1130 	regs.r_pc = mem_get_si (regs.r_intb);
1131 	cycles(6);
1132       }
1133       break;
1134 
1135     case RXO_bset:
1136       ma = GD ();
1137       mb = GS ();
1138       if (opcode->op[0].type == RX_Operand_Register)
1139 	mb &= 0x1f;
1140       else
1141 	mb &= 0x07;
1142       ma |= (1 << mb);
1143       PD (ma);
1144       EBIT;
1145       break;
1146 
1147     case RXO_btst:
1148       ma = GS ();
1149       mb = GS2 ();
1150       if (opcode->op[1].type == RX_Operand_Register)
1151 	mb &= 0x1f;
1152       else
1153 	mb &= 0x07;
1154       umb = ma & (1 << mb);
1155       set_zc (! umb, umb);
1156       EBIT;
1157       break;
1158 
1159     case RXO_clrpsw:
1160       v = 1 << opcode->op[0].reg;
1161       if (FLAG_PM
1162 	  && (v == FLAGBIT_I
1163 	      || v == FLAGBIT_U))
1164 	break;
1165       regs.r_psw &= ~v;
1166       cycles (1);
1167       break;
1168 
1169     case RXO_div: /* d = d / s */
1170       ma = GS();
1171       mb = GD();
1172       tprintf("%d / %d = ", mb, ma);
1173       if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000))
1174 	{
1175 	  tprintf("#NAN\n");
1176 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1177 	  cycles (3);
1178 	}
1179       else
1180 	{
1181 	  v = mb/ma;
1182 	  tprintf("%d\n", v);
1183 	  set_flags (FLAGBIT_O, 0);
1184 	  PD (v);
1185 	  div_cycles (mb, ma);
1186 	}
1187       break;
1188 
1189     case RXO_divu: /* d = d / s */
1190       uma = GS();
1191       umb = GD();
1192       tprintf("%u / %u = ", umb, uma);
1193       if (uma == 0)
1194 	{
1195 	  tprintf("#NAN\n");
1196 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1197 	  cycles (2);
1198 	}
1199       else
1200 	{
1201 	  v = umb / uma;
1202 	  tprintf("%u\n", v);
1203 	  set_flags (FLAGBIT_O, 0);
1204 	  PD (v);
1205 	  divu_cycles (umb, uma);
1206 	}
1207       break;
1208 
1209     case RXO_emul:
1210       ma = GD ();
1211       mb = GS ();
1212       sll = (long long)ma * (long long)mb;
1213       tprintf("%d * %d = %lld\n", ma, mb, sll);
1214       put_reg (opcode->op[0].reg, sll);
1215       put_reg (opcode->op[0].reg + 1, sll >> 32);
1216       E2;
1217       break;
1218 
1219     case RXO_emulu:
1220       uma = GD ();
1221       umb = GS ();
1222       ll = (long long)uma * (long long)umb;
1223       tprintf("%#x * %#x = %#llx\n", uma, umb, ll);
1224       put_reg (opcode->op[0].reg, ll);
1225       put_reg (opcode->op[0].reg + 1, ll >> 32);
1226       E2;
1227       break;
1228 
1229     case RXO_fadd:
1230       FLOAT_OP (fadd);
1231       E (4);
1232       break;
1233 
1234     case RXO_fcmp:
1235       ma = GD();
1236       mb = GS();
1237       FPCLEAR ();
1238       rxfp_cmp (ma, mb);
1239       FPCHECK ();
1240       E (1);
1241       break;
1242 
1243     case RXO_fdiv:
1244       FLOAT_OP (fdiv);
1245       E (16);
1246       break;
1247 
1248     case RXO_fmul:
1249       FLOAT_OP (fmul);
1250       E (3);
1251       break;
1252 
1253     case RXO_rtfi:
1254       PRIVILEDGED ();
1255       regs.r_psw = regs.r_bpsw;
1256       regs.r_pc = regs.r_bpc;
1257 #ifdef CYCLE_ACCURATE
1258       regs.fast_return = 0;
1259       cycles(3);
1260 #endif
1261       break;
1262 
1263     case RXO_fsub:
1264       FLOAT_OP (fsub);
1265       E (4);
1266       break;
1267 
1268     case RXO_ftoi:
1269       ma = GS ();
1270       FPCLEAR ();
1271       mb = rxfp_ftoi (ma, FPRM_ZERO);
1272       FPCHECK ();
1273       PD (mb);
1274       tprintf("(int) %g = %d\n", int2float(ma), mb);
1275       set_sz (mb, 4);
1276       E (2);
1277       break;
1278 
1279     case RXO_int:
1280       v = GS ();
1281       if (v == 255)
1282 	{
1283 	  int rc = rx_syscall (regs.r[5]);
1284 	  if (! RX_STEPPED (rc))
1285 	    DO_RETURN (rc);
1286 	}
1287       else
1288 	{
1289 	  int old_psw = regs.r_psw;
1290 	  regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1291 	  pushpc (old_psw);
1292 	  pushpc (regs.r_pc);
1293 	  regs.r_pc = mem_get_si (regs.r_intb + 4 * v);
1294 	}
1295       cycles (6);
1296       break;
1297 
1298     case RXO_itof:
1299       ma = GS ();
1300       FPCLEAR ();
1301       mb = rxfp_itof (ma, regs.r_fpsw);
1302       FPCHECK ();
1303       tprintf("(float) %d = %x\n", ma, mb);
1304       PD (mb);
1305       set_sz (ma, 4);
1306       E (2);
1307       break;
1308 
1309     case RXO_jsr:
1310     case RXO_jsrrel:
1311       {
1312 #ifdef CYCLE_ACCURATE
1313 	int delta;
1314 	regs.m2m = 0;
1315 #endif
1316 	v = GD ();
1317 #ifdef CYCLE_ACCURATE
1318 	regs.link_register = regs.r_pc;
1319 #endif
1320 	pushpc (get_reg (pc));
1321 	if (opcode->id == RXO_jsrrel)
1322 	  v += regs.r_pc;
1323 #ifdef CYCLE_ACCURATE
1324 	delta = v - regs.r_pc;
1325 #endif
1326 	put_reg (pc, v);
1327 #ifdef CYCLE_ACCURATE
1328 	/* Note: docs say 3, chip says 2 */
1329 	if (delta >= 0 && delta < 16)
1330 	  {
1331 	    tprintf ("near forward jsr bonus\n");
1332 	    cycles (2);
1333 	  }
1334 	else
1335 	  {
1336 	    branch_alignment_penalty = 1;
1337 	    cycles (3);
1338 	  }
1339 	regs.fast_return = 1;
1340 #endif
1341       }
1342       break;
1343 
1344     case RXO_machi:
1345       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1346       ll <<= 16;
1347       put_reg64 (acc64, ll + regs.r_acc);
1348       E1;
1349       break;
1350 
1351     case RXO_maclo:
1352       ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1353       ll <<= 16;
1354       put_reg64 (acc64, ll + regs.r_acc);
1355       E1;
1356       break;
1357 
1358     case RXO_max:
1359       mb = GS();
1360       ma = GD();
1361       if (ma > mb)
1362 	PD (ma);
1363       else
1364 	PD (mb);
1365       E (1);
1366       break;
1367 
1368     case RXO_min:
1369       mb = GS();
1370       ma = GD();
1371       if (ma < mb)
1372 	PD (ma);
1373       else
1374 	PD (mb);
1375       E (1);
1376       break;
1377 
1378     case RXO_mov:
1379       v = GS ();
1380 
1381       if (opcode->op[1].type == RX_Operand_Register
1382 	  && opcode->op[1].reg == 17 /* PC */)
1383 	{
1384 	  /* Special case.  We want the address of the insn, not the
1385 	     address of the next insn.  */
1386 	  v = opcode_pc;
1387 	}
1388 
1389       if (opcode->op[0].type == RX_Operand_Register
1390 	  && opcode->op[0].reg == 16 /* PSW */)
1391 	{
1392 	  /* Special case, LDC and POPC can't ever modify PM.  */
1393 	  int pm = regs.r_psw & FLAGBIT_PM;
1394 	  v &= ~ FLAGBIT_PM;
1395 	  v |= pm;
1396 	  if (pm)
1397 	    {
1398 	      v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1399 	      v |= pm;
1400 	    }
1401 	}
1402       if (FLAG_PM)
1403 	{
1404 	  /* various things can't be changed in user mode.  */
1405 	  if (opcode->op[0].type == RX_Operand_Register)
1406 	    if (opcode->op[0].reg == 32)
1407 	      {
1408 		v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1409 		v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1410 	      }
1411 	  if (opcode->op[0].reg == 34 /* ISP */
1412 	      || opcode->op[0].reg == 37 /* BPSW */
1413 	      || opcode->op[0].reg == 39 /* INTB */
1414 	      || opcode->op[0].reg == 38 /* VCT */)
1415 	    /* These are ignored.  */
1416 	    break;
1417 	}
1418       if (OM(0) && OM(1))
1419 	cycles (2);
1420       else
1421 	cycles (1);
1422 
1423       PD (v);
1424 
1425 #ifdef CYCLE_ACCURATE
1426       if ((opcode->op[0].type == RX_Operand_Predec
1427 	   && opcode->op[1].type == RX_Operand_Register)
1428 	  || (opcode->op[0].type == RX_Operand_Postinc
1429 	      && opcode->op[1].type == RX_Operand_Register))
1430 	{
1431 	  /* Special case: push reg doesn't cause a memory stall.  */
1432 	  memory_dest = 0;
1433 	  tprintf("push special case\n");
1434 	}
1435 #endif
1436 
1437       set_sz (v, DSZ());
1438       break;
1439 
1440     case RXO_movbi:
1441       PD (GS ());
1442       cycles (1);
1443       break;
1444 
1445     case RXO_movbir:
1446       PS (GD ());
1447       cycles (1);
1448       break;
1449 
1450     case RXO_mul:
1451       v = US2 ();
1452       ll = (unsigned long long) US1() * (unsigned long long) v;
1453       PD(ll);
1454       E (1);
1455       break;
1456 
1457     case RXO_mulhi:
1458       v = GS2 ();
1459       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16);
1460       ll <<= 16;
1461       put_reg64 (acc64, ll);
1462       E1;
1463       break;
1464 
1465     case RXO_mullo:
1466       v = GS2 ();
1467       ll = (long long)(signed short)(GS()) * (long long)(signed short)(v);
1468       ll <<= 16;
1469       put_reg64 (acc64, ll);
1470       E1;
1471       break;
1472 
1473     case RXO_mvfachi:
1474       PD (get_reg (acchi));
1475       E1;
1476       break;
1477 
1478     case RXO_mvfaclo:
1479       PD (get_reg (acclo));
1480       E1;
1481       break;
1482 
1483     case RXO_mvfacmi:
1484       PD (get_reg (accmi));
1485       E1;
1486       break;
1487 
1488     case RXO_mvtachi:
1489       put_reg (acchi, GS ());
1490       E1;
1491       break;
1492 
1493     case RXO_mvtaclo:
1494       put_reg (acclo, GS ());
1495       E1;
1496       break;
1497 
1498     case RXO_mvtipl:
1499       regs.r_psw &= ~ FLAGBITS_IPL;
1500       regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL;
1501       E1;
1502       break;
1503 
1504     case RXO_nop:
1505     case RXO_nop2:
1506     case RXO_nop3:
1507       E1;
1508       break;
1509 
1510     case RXO_or:
1511       LOGIC_OP (|);
1512       break;
1513 
1514     case RXO_popm:
1515       /* POPM cannot pop R0 (sp).  */
1516       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1517 	EXCEPTION (EX_UNDEFINED);
1518       if (opcode->op[1].reg >= opcode->op[2].reg)
1519 	{
1520 	  regs.r_pc = opcode_pc;
1521 	  DO_RETURN (RX_MAKE_STOPPED (SIGILL));
1522 	}
1523       for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++)
1524 	{
1525 	  cycles (1);
1526 	  RLD (v);
1527 	  put_reg (v, pop ());
1528 	}
1529       break;
1530 
1531     case RXO_pushm:
1532       /* PUSHM cannot push R0 (sp).  */
1533       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1534 	EXCEPTION (EX_UNDEFINED);
1535       if (opcode->op[1].reg >= opcode->op[2].reg)
1536 	{
1537 	  regs.r_pc = opcode_pc;
1538 	  return RX_MAKE_STOPPED (SIGILL);
1539 	}
1540       for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--)
1541 	{
1542 	  RL (v);
1543 	  push (get_reg (v));
1544 	}
1545       cycles (opcode->op[2].reg - opcode->op[1].reg + 1);
1546       break;
1547 
1548     case RXO_racw:
1549       ll = get_reg64 (acc64) << GS ();
1550       ll += 0x80000000ULL;
1551       if ((signed long long)ll > (signed long long)0x00007fff00000000ULL)
1552 	ll = 0x00007fff00000000ULL;
1553       else if ((signed long long)ll < (signed long long)0xffff800000000000ULL)
1554 	ll = 0xffff800000000000ULL;
1555       else
1556 	ll &= 0xffffffff00000000ULL;
1557       put_reg64 (acc64, ll);
1558       E1;
1559       break;
1560 
1561     case RXO_rte:
1562       PRIVILEDGED ();
1563       regs.r_pc = poppc ();
1564       regs.r_psw = poppc ();
1565       if (FLAG_PM)
1566 	regs.r_psw |= FLAGBIT_U;
1567 #ifdef CYCLE_ACCURATE
1568       regs.fast_return = 0;
1569       cycles (6);
1570 #endif
1571       break;
1572 
1573     case RXO_revl:
1574       uma = GS ();
1575       umb = (((uma >> 24) & 0xff)
1576 	     | ((uma >> 8) & 0xff00)
1577 	     | ((uma << 8) & 0xff0000)
1578 	     | ((uma << 24) & 0xff000000UL));
1579       PD (umb);
1580       E1;
1581       break;
1582 
1583     case RXO_revw:
1584       uma = GS ();
1585       umb = (((uma >> 8) & 0x00ff00ff)
1586 	     | ((uma << 8) & 0xff00ff00UL));
1587       PD (umb);
1588       E1;
1589       break;
1590 
1591     case RXO_rmpa:
1592       RL(4);
1593       RL(5);
1594 #ifdef CYCLE_ACCURATE
1595       tx = regs.r[3];
1596 #endif
1597 
1598       while (regs.r[3] != 0)
1599 	{
1600 	  long long tmp;
1601 
1602 	  switch (opcode->size)
1603 	    {
1604 	    case RX_Long:
1605 	      ma = mem_get_si (regs.r[1]);
1606 	      mb = mem_get_si (regs.r[2]);
1607 	      regs.r[1] += 4;
1608 	      regs.r[2] += 4;
1609 	      break;
1610 	    case RX_Word:
1611 	      ma = sign_ext (mem_get_hi (regs.r[1]), 16);
1612 	      mb = sign_ext (mem_get_hi (regs.r[2]), 16);
1613 	      regs.r[1] += 2;
1614 	      regs.r[2] += 2;
1615 	      break;
1616 	    case RX_Byte:
1617 	      ma = sign_ext (mem_get_qi (regs.r[1]), 8);
1618 	      mb = sign_ext (mem_get_qi (regs.r[2]), 8);
1619 	      regs.r[1] += 1;
1620 	      regs.r[2] += 1;
1621 	      break;
1622 	    default:
1623 	      abort ();
1624 	    }
1625 	  /* We do the multiply as a signed value.  */
1626 	  sll = (long long)ma * (long long)mb;
1627 	  tprintf("        %016llx = %d * %d\n", sll, ma, mb);
1628 	  /* but we do the sum as unsigned, while sign extending the operands.  */
1629 	  tmp = regs.r[4] + (sll & 0xffffffffUL);
1630 	  regs.r[4] = tmp & 0xffffffffUL;
1631 	  tmp >>= 32;
1632 	  sll >>= 32;
1633 	  tmp += regs.r[5] + (sll & 0xffffffffUL);
1634 	  regs.r[5] = tmp & 0xffffffffUL;
1635 	  tmp >>= 32;
1636 	  sll >>= 32;
1637 	  tmp += regs.r[6] + (sll & 0xffffffffUL);
1638 	  regs.r[6] = tmp & 0xffffffffUL;
1639 	  tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1640 		  (unsigned long) regs.r[6],
1641 		  (unsigned long) regs.r[5],
1642 		  (unsigned long) regs.r[4]);
1643 
1644 	  regs.r[3] --;
1645 	}
1646       if (regs.r[6] & 0x00008000)
1647 	regs.r[6] |= 0xffff0000UL;
1648       else
1649 	regs.r[6] &= 0x0000ffff;
1650       ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0;
1651       if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL)
1652 	set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O);
1653       else
1654 	set_flags (FLAGBIT_O|FLAGBIT_S, ma);
1655 #ifdef CYCLE_ACCURATE
1656       switch (opcode->size)
1657 	{
1658 	case RX_Long:
1659 	  cycles (6 + 4 * tx);
1660 	  break;
1661 	case RX_Word:
1662 	  cycles (6 + 5 * (tx / 2) + 4 * (tx % 2));
1663 	  break;
1664 	case RX_Byte:
1665 	  cycles (6 + 7 * (tx / 4) + 4 * (tx % 4));
1666 	  break;
1667 	default:
1668 	  abort ();
1669 	}
1670 #endif
1671       break;
1672 
1673     case RXO_rolc:
1674       v = GD ();
1675       ma = v & 0x80000000UL;
1676       v <<= 1;
1677       v |= carry;
1678       set_szc (v, 4, ma);
1679       PD (v);
1680       E1;
1681       break;
1682 
1683     case RXO_rorc:
1684       uma = GD ();
1685       mb = uma & 1;
1686       uma >>= 1;
1687       uma |= (carry ? 0x80000000UL : 0);
1688       set_szc (uma, 4, mb);
1689       PD (uma);
1690       E1;
1691       break;
1692 
1693     case RXO_rotl:
1694       mb = GS ();
1695       uma = GD ();
1696       if (mb)
1697 	{
1698 	  uma = (uma << mb) | (uma >> (32-mb));
1699 	  mb = uma & 1;
1700 	}
1701       set_szc (uma, 4, mb);
1702       PD (uma);
1703       E1;
1704       break;
1705 
1706     case RXO_rotr:
1707       mb = GS ();
1708       uma = GD ();
1709       if (mb)
1710 	{
1711 	  uma = (uma >> mb) | (uma << (32-mb));
1712 	  mb = uma & 0x80000000;
1713 	}
1714       set_szc (uma, 4, mb);
1715       PD (uma);
1716       E1;
1717       break;
1718 
1719     case RXO_round:
1720       ma = GS ();
1721       FPCLEAR ();
1722       mb = rxfp_ftoi (ma, regs.r_fpsw);
1723       FPCHECK ();
1724       PD (mb);
1725       tprintf("(int) %g = %d\n", int2float(ma), mb);
1726       set_sz (mb, 4);
1727       E (2);
1728       break;
1729 
1730     case RXO_rts:
1731       {
1732 #ifdef CYCLE_ACCURATE
1733 	int cyc = 5;
1734 #endif
1735 	regs.r_pc = poppc ();
1736 #ifdef CYCLE_ACCURATE
1737 	/* Note: specs say 5, chip says 3.  */
1738 	if (regs.fast_return && regs.link_register == regs.r_pc)
1739 	  {
1740 #ifdef CYCLE_STATS
1741 	    fast_returns ++;
1742 #endif
1743 	    tprintf("fast return bonus\n");
1744 	    cyc -= 2;
1745 	  }
1746 	cycles (cyc);
1747 	regs.fast_return = 0;
1748 	branch_alignment_penalty = 1;
1749 #endif
1750       }
1751       break;
1752 
1753     case RXO_rtsd:
1754       if (opcode->op[2].type == RX_Operand_Register)
1755 	{
1756 	  int i;
1757 	  /* RTSD cannot pop R0 (sp).  */
1758 	  put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4);
1759 	  if (opcode->op[2].reg == 0)
1760 	    EXCEPTION (EX_UNDEFINED);
1761 #ifdef CYCLE_ACCURATE
1762 	  tx = opcode->op[0].reg - opcode->op[2].reg + 1;
1763 #endif
1764 	  for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++)
1765 	    {
1766 	      RLD (i);
1767 	      put_reg (i, pop ());
1768 	    }
1769 	}
1770       else
1771 	{
1772 #ifdef CYCLE_ACCURATE
1773 	  tx = 0;
1774 #endif
1775 	  put_reg (0, get_reg (0) + GS());
1776 	}
1777       put_reg (pc, poppc());
1778 #ifdef CYCLE_ACCURATE
1779       if (regs.fast_return && regs.link_register == regs.r_pc)
1780 	{
1781 	  tprintf("fast return bonus\n");
1782 #ifdef CYCLE_STATS
1783 	  fast_returns ++;
1784 #endif
1785 	  cycles (tx < 3 ? 3 : tx + 1);
1786 	}
1787       else
1788 	{
1789 	  cycles (tx < 5 ? 5 : tx + 1);
1790 	}
1791       regs.fast_return = 0;
1792       branch_alignment_penalty = 1;
1793 #endif
1794       break;
1795 
1796     case RXO_sat:
1797       if (FLAG_O && FLAG_S)
1798 	PD (0x7fffffffUL);
1799       else if (FLAG_O && ! FLAG_S)
1800 	PD (0x80000000UL);
1801       E1;
1802       break;
1803 
1804     case RXO_satr:
1805       if (FLAG_O && ! FLAG_S)
1806 	{
1807 	  put_reg (6, 0x0);
1808 	  put_reg (5, 0x7fffffff);
1809 	  put_reg (4, 0xffffffff);
1810 	}
1811       else if (FLAG_O && FLAG_S)
1812 	{
1813 	  put_reg (6, 0xffffffff);
1814 	  put_reg (5, 0x80000000);
1815 	  put_reg (4, 0x0);
1816 	}
1817       E1;
1818       break;
1819 
1820     case RXO_sbb:
1821       MATH_OP (-, ! carry);
1822       break;
1823 
1824     case RXO_sccnd:
1825       if (GS())
1826 	PD (1);
1827       else
1828 	PD (0);
1829       E1;
1830       break;
1831 
1832     case RXO_scmpu:
1833 #ifdef CYCLE_ACCURATE
1834       tx = regs.r[3];
1835 #endif
1836       while (regs.r[3] != 0)
1837 	{
1838 	  uma = mem_get_qi (regs.r[1] ++);
1839 	  umb = mem_get_qi (regs.r[2] ++);
1840 	  regs.r[3] --;
1841 	  if (uma != umb || uma == 0)
1842 	    break;
1843 	}
1844       if (uma == umb)
1845 	set_zc (1, 1);
1846       else
1847 	set_zc (0, ((int)uma - (int)umb) >= 0);
1848       cycles (2 + 4 * (tx / 4) + 4 * (tx % 4));
1849       break;
1850 
1851     case RXO_setpsw:
1852       v = 1 << opcode->op[0].reg;
1853       if (FLAG_PM
1854 	  && (v == FLAGBIT_I
1855 	      || v == FLAGBIT_U))
1856 	break;
1857       regs.r_psw |= v;
1858       cycles (1);
1859       break;
1860 
1861     case RXO_smovb:
1862       RL (3);
1863 #ifdef CYCLE_ACCURATE
1864       tx = regs.r[3];
1865 #endif
1866       while (regs.r[3])
1867 	{
1868 	  uma = mem_get_qi (regs.r[2] --);
1869 	  mem_put_qi (regs.r[1]--, uma);
1870 	  regs.r[3] --;
1871 	}
1872 #ifdef CYCLE_ACCURATE
1873       if (tx > 3)
1874 	cycles (6 + 3 * (tx / 4) + 3 * (tx % 4));
1875       else
1876 	cycles (2 + 3 * (tx % 4));
1877 #endif
1878       break;
1879 
1880     case RXO_smovf:
1881       RL (3);
1882 #ifdef CYCLE_ACCURATE
1883       tx = regs.r[3];
1884 #endif
1885       while (regs.r[3])
1886 	{
1887 	  uma = mem_get_qi (regs.r[2] ++);
1888 	  mem_put_qi (regs.r[1]++, uma);
1889 	  regs.r[3] --;
1890 	}
1891       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1892       break;
1893 
1894     case RXO_smovu:
1895 #ifdef CYCLE_ACCURATE
1896       tx = regs.r[3];
1897 #endif
1898       while (regs.r[3] != 0)
1899 	{
1900 	  uma = mem_get_qi (regs.r[2] ++);
1901 	  mem_put_qi (regs.r[1]++, uma);
1902 	  regs.r[3] --;
1903 	  if (uma == 0)
1904 	    break;
1905 	}
1906       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1907       break;
1908 
1909     case RXO_shar: /* d = ma >> mb */
1910       SHIFT_OP (sll, int, mb, >>=, 1);
1911       E (1);
1912       break;
1913 
1914     case RXO_shll: /* d = ma << mb */
1915       SHIFT_OP (ll, int, mb, <<=, 0x80000000UL);
1916       E (1);
1917       break;
1918 
1919     case RXO_shlr: /* d = ma >> mb */
1920       SHIFT_OP (ll, unsigned int, mb, >>=, 1);
1921       E (1);
1922       break;
1923 
1924     case RXO_sstr:
1925       RL (3);
1926 #ifdef CYCLE_ACCURATE
1927       tx = regs.r[3];
1928 #endif
1929       switch (opcode->size)
1930 	{
1931 	case RX_Long:
1932 	  while (regs.r[3] != 0)
1933 	    {
1934 	      mem_put_si (regs.r[1], regs.r[2]);
1935 	      regs.r[1] += 4;
1936 	      regs.r[3] --;
1937 	    }
1938 	  cycles (2 + tx);
1939 	  break;
1940 	case RX_Word:
1941 	  while (regs.r[3] != 0)
1942 	    {
1943 	      mem_put_hi (regs.r[1], regs.r[2]);
1944 	      regs.r[1] += 2;
1945 	      regs.r[3] --;
1946 	    }
1947 	  cycles (2 + (int)(tx / 2) + tx % 2);
1948 	  break;
1949 	case RX_Byte:
1950 	  while (regs.r[3] != 0)
1951 	    {
1952 	      mem_put_qi (regs.r[1], regs.r[2]);
1953 	      regs.r[1] ++;
1954 	      regs.r[3] --;
1955 	    }
1956 	  cycles (2 + (int)(tx / 4) + tx % 4);
1957 	  break;
1958 	default:
1959 	  abort ();
1960 	}
1961       break;
1962 
1963     case RXO_stcc:
1964       if (GS2())
1965 	PD (GS ());
1966       E1;
1967       break;
1968 
1969     case RXO_stop:
1970       PRIVILEDGED ();
1971       regs.r_psw |= FLAGBIT_I;
1972       DO_RETURN (RX_MAKE_STOPPED(0));
1973 
1974     case RXO_sub:
1975       MATH_OP (-, 0);
1976       break;
1977 
1978     case RXO_suntil:
1979       RL(3);
1980 #ifdef CYCLE_ACCURATE
1981       tx = 0;
1982 #endif
1983       if (regs.r[3] == 0)
1984 	{
1985 	  cycles (3);
1986 	  break;
1987 	}
1988       switch (opcode->size)
1989 	{
1990 	case RX_Long:
1991 	  uma = get_reg (2);
1992 	  while (regs.r[3] != 0)
1993 	    {
1994 	      regs.r[3] --;
1995 	      umb = mem_get_si (get_reg (1));
1996 	      regs.r[1] += 4;
1997 #ifdef CYCLE_ACCURATE
1998 	      tx ++;
1999 #endif
2000 	      if (umb == uma)
2001 		break;
2002 	    }
2003 #ifdef CYCLE_ACCURATE
2004 	  cycles (3 + 3 * tx);
2005 #endif
2006 	  break;
2007 	case RX_Word:
2008 	  uma = get_reg (2) & 0xffff;
2009 	  while (regs.r[3] != 0)
2010 	    {
2011 	      regs.r[3] --;
2012 	      umb = mem_get_hi (get_reg (1));
2013 	      regs.r[1] += 2;
2014 #ifdef CYCLE_ACCURATE
2015 	      tx ++;
2016 #endif
2017 	      if (umb == uma)
2018 		break;
2019 	    }
2020 #ifdef CYCLE_ACCURATE
2021 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2022 #endif
2023 	  break;
2024 	case RX_Byte:
2025 	  uma = get_reg (2) & 0xff;
2026 	  while (regs.r[3] != 0)
2027 	    {
2028 	      regs.r[3] --;
2029 	      umb = mem_get_qi (regs.r[1]);
2030 	      regs.r[1] += 1;
2031 #ifdef CYCLE_ACCURATE
2032 	      tx ++;
2033 #endif
2034 	      if (umb == uma)
2035 		break;
2036 	    }
2037 #ifdef CYCLE_ACCURATE
2038 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2039 #endif
2040 	  break;
2041 	default:
2042 	  abort();
2043 	}
2044       if (uma == umb)
2045 	set_zc (1, 1);
2046       else
2047 	set_zc (0, ((int)uma - (int)umb) >= 0);
2048       break;
2049 
2050     case RXO_swhile:
2051       RL(3);
2052 #ifdef CYCLE_ACCURATE
2053       tx = 0;
2054 #endif
2055       if (regs.r[3] == 0)
2056 	break;
2057       switch (opcode->size)
2058 	{
2059 	case RX_Long:
2060 	  uma = get_reg (2);
2061 	  while (regs.r[3] != 0)
2062 	    {
2063 	      regs.r[3] --;
2064 	      umb = mem_get_si (get_reg (1));
2065 	      regs.r[1] += 4;
2066 #ifdef CYCLE_ACCURATE
2067 	      tx ++;
2068 #endif
2069 	      if (umb != uma)
2070 		break;
2071 	    }
2072 #ifdef CYCLE_ACCURATE
2073 	  cycles (3 + 3 * tx);
2074 #endif
2075 	  break;
2076 	case RX_Word:
2077 	  uma = get_reg (2) & 0xffff;
2078 	  while (regs.r[3] != 0)
2079 	    {
2080 	      regs.r[3] --;
2081 	      umb = mem_get_hi (get_reg (1));
2082 	      regs.r[1] += 2;
2083 #ifdef CYCLE_ACCURATE
2084 	      tx ++;
2085 #endif
2086 	      if (umb != uma)
2087 		break;
2088 	    }
2089 #ifdef CYCLE_ACCURATE
2090 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2091 #endif
2092 	  break;
2093 	case RX_Byte:
2094 	  uma = get_reg (2) & 0xff;
2095 	  while (regs.r[3] != 0)
2096 	    {
2097 	      regs.r[3] --;
2098 	      umb = mem_get_qi (regs.r[1]);
2099 	      regs.r[1] += 1;
2100 #ifdef CYCLE_ACCURATE
2101 	      tx ++;
2102 #endif
2103 	      if (umb != uma)
2104 		break;
2105 	    }
2106 #ifdef CYCLE_ACCURATE
2107 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2108 #endif
2109 	  break;
2110 	default:
2111 	  abort();
2112 	}
2113       if (uma == umb)
2114 	set_zc (1, 1);
2115       else
2116 	set_zc (0, ((int)uma - (int)umb) >= 0);
2117       break;
2118 
2119     case RXO_wait:
2120       PRIVILEDGED ();
2121       regs.r_psw |= FLAGBIT_I;
2122       DO_RETURN (RX_MAKE_STOPPED(0));
2123 
2124     case RXO_xchg:
2125 #ifdef CYCLE_ACCURATE
2126       regs.m2m = 0;
2127 #endif
2128       v = GS (); /* This is the memory operand, if any.  */
2129       PS (GD ()); /* and this may change the address register.  */
2130       PD (v);
2131       E2;
2132 #ifdef CYCLE_ACCURATE
2133       /* all M cycles happen during xchg's cycles.  */
2134       memory_dest = 0;
2135       memory_source = 0;
2136 #endif
2137       break;
2138 
2139     case RXO_xor:
2140       LOGIC_OP (^);
2141       break;
2142 
2143     default:
2144       EXCEPTION (EX_UNDEFINED);
2145     }
2146 
2147 #ifdef CYCLE_ACCURATE
2148   regs.m2m = 0;
2149   if (memory_source)
2150     regs.m2m |= M2M_SRC;
2151   if (memory_dest)
2152     regs.m2m |= M2M_DST;
2153 
2154   regs.rt = new_rt;
2155   new_rt = -1;
2156 #endif
2157 
2158 #ifdef CYCLE_STATS
2159   if (prev_cycle_count == regs.cycle_count)
2160     {
2161       printf("Cycle count not updated! id %s\n", id_names[opcode->id]);
2162       abort ();
2163     }
2164 #endif
2165 
2166 #ifdef CYCLE_STATS
2167   if (running_benchmark)
2168     {
2169       int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type);
2170 
2171 
2172       cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count;
2173       times_per_id[opcode->id][omap] ++;
2174 
2175       times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++;
2176 
2177       prev_opcode_id = opcode->id;
2178       po0 = omap;
2179     }
2180 #endif
2181 
2182   return RX_MAKE_STEPPED ();
2183 }
2184 
2185 #ifdef CYCLE_STATS
2186 void
2187 reset_pipeline_stats (void)
2188 {
2189   memset (cycles_per_id, 0, sizeof(cycles_per_id));
2190   memset (times_per_id, 0, sizeof(times_per_id));
2191   memory_stalls = 0;
2192   register_stalls = 0;
2193   branch_stalls = 0;
2194   branch_alignment_stalls = 0;
2195   fast_returns = 0;
2196   memset (times_per_pair, 0, sizeof(times_per_pair));
2197   running_benchmark = 1;
2198 
2199   benchmark_start_cycle = regs.cycle_count;
2200 }
2201 
2202 void
2203 halt_pipeline_stats (void)
2204 {
2205   running_benchmark = 0;
2206   benchmark_end_cycle = regs.cycle_count;
2207 }
2208 #endif
2209 
2210 void
2211 pipeline_stats (void)
2212 {
2213 #ifdef CYCLE_STATS
2214   int i, o1;
2215   int p, p1;
2216 #endif
2217 
2218 #ifdef CYCLE_ACCURATE
2219   if (verbose == 1)
2220     {
2221       printf ("cycles: %llu\n", regs.cycle_count);
2222       return;
2223     }
2224 
2225   printf ("cycles: %13s\n", comma (regs.cycle_count));
2226 #endif
2227 
2228 #ifdef CYCLE_STATS
2229   if (benchmark_start_cycle)
2230     printf ("bmark:  %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle));
2231 
2232   printf("\n");
2233   for (i = 0; i < N_RXO; i++)
2234     for (o1 = 0; o1 < N_MAP; o1 ++)
2235       if (times_per_id[i][o1])
2236 	printf("%13s %13s %7.2f  %s %s\n",
2237 	       comma (cycles_per_id[i][o1]),
2238 	       comma (times_per_id[i][o1]),
2239 	       (double)cycles_per_id[i][o1] / times_per_id[i][o1],
2240 	       op_cache_string(o1),
2241 	       id_names[i]+4);
2242 
2243   printf("\n");
2244   for (p = 0; p < N_RXO; p ++)
2245     for (p1 = 0; p1 < N_MAP; p1 ++)
2246       for (i = 0; i < N_RXO; i ++)
2247 	for (o1 = 0; o1 < N_MAP; o1 ++)
2248 	  if (times_per_pair[p][p1][i][o1])
2249 	    {
2250 	      printf("%13s   %s %-9s  ->  %s %s\n",
2251 		     comma (times_per_pair[p][p1][i][o1]),
2252 		     op_cache_string(p1),
2253 		     id_names[p]+4,
2254 		     op_cache_string(o1),
2255 		     id_names[i]+4);
2256 	    }
2257 
2258   printf("\n");
2259   printf("%13s memory stalls\n", comma (memory_stalls));
2260   printf("%13s register stalls\n", comma (register_stalls));
2261   printf("%13s branches taken (non-return)\n", comma (branch_stalls));
2262   printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls));
2263   printf("%13s fast returns\n", comma (fast_returns));
2264 #endif
2265 }
2266