xref: /netbsd-src/external/gpl3/gdb/dist/sim/rx/rx.c (revision cc576e1d8e4f4078fd4e81238abca9fca216f6ec)
1 /* rx.c --- opcode semantics for stand-alone RX simulator.
2 
3 Copyright (C) 2008-2016 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
5 
6 This file is part of the GNU simulators.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <signal.h>
26 
27 #include "opcode/rx.h"
28 #include "cpu.h"
29 #include "mem.h"
30 #include "syscalls.h"
31 #include "fpu.h"
32 #include "err.h"
33 #include "misc.h"
34 
35 #ifdef CYCLE_STATS
36 static const char * id_names[] = {
37   "RXO_unknown",
38   "RXO_mov",	/* d = s (signed) */
39   "RXO_movbi",	/* d = [s,s2] (signed) */
40   "RXO_movbir",	/* [s,s2] = d (signed) */
41   "RXO_pushm",	/* s..s2 */
42   "RXO_popm",	/* s..s2 */
43   "RXO_xchg",	/* s <-> d */
44   "RXO_stcc",	/* d = s if cond(s2) */
45   "RXO_rtsd",	/* rtsd, 1=imm, 2-0 = reg if reg type */
46 
47   /* These are all either d OP= s or, if s2 is set, d = s OP s2.  Note
48      that d may be "None".  */
49   "RXO_and",
50   "RXO_or",
51   "RXO_xor",
52   "RXO_add",
53   "RXO_sub",
54   "RXO_mul",
55   "RXO_div",
56   "RXO_divu",
57   "RXO_shll",
58   "RXO_shar",
59   "RXO_shlr",
60 
61   "RXO_adc",	/* d = d + s + carry */
62   "RXO_sbb",	/* d = d - s - ~carry */
63   "RXO_abs",	/* d = |s| */
64   "RXO_max",	/* d = max(d,s) */
65   "RXO_min",	/* d = min(d,s) */
66   "RXO_emul",	/* d:64 = d:32 * s */
67   "RXO_emulu",	/* d:64 = d:32 * s (unsigned) */
68 
69   "RXO_rolc",	/* d <<= 1 through carry */
70   "RXO_rorc",	/* d >>= 1 through carry*/
71   "RXO_rotl",	/* d <<= #s without carry */
72   "RXO_rotr",	/* d >>= #s without carry*/
73   "RXO_revw",	/* d = revw(s) */
74   "RXO_revl",	/* d = revl(s) */
75   "RXO_branch",	/* pc = d if cond(s) */
76   "RXO_branchrel",/* pc += d if cond(s) */
77   "RXO_jsr",	/* pc = d */
78   "RXO_jsrrel",	/* pc += d */
79   "RXO_rts",
80   "RXO_nop",
81   "RXO_nop2",
82   "RXO_nop3",
83   "RXO_nop4",
84   "RXO_nop5",
85   "RXO_nop6",
86   "RXO_nop7",
87 
88   "RXO_scmpu",
89   "RXO_smovu",
90   "RXO_smovb",
91   "RXO_suntil",
92   "RXO_swhile",
93   "RXO_smovf",
94   "RXO_sstr",
95 
96   "RXO_rmpa",
97   "RXO_mulhi",
98   "RXO_mullo",
99   "RXO_machi",
100   "RXO_maclo",
101   "RXO_mvtachi",
102   "RXO_mvtaclo",
103   "RXO_mvfachi",
104   "RXO_mvfacmi",
105   "RXO_mvfaclo",
106   "RXO_racw",
107 
108   "RXO_sat",	/* sat(d) */
109   "RXO_satr",
110 
111   "RXO_fadd",	/* d op= s */
112   "RXO_fcmp",
113   "RXO_fsub",
114   "RXO_ftoi",
115   "RXO_fmul",
116   "RXO_fdiv",
117   "RXO_round",
118   "RXO_itof",
119 
120   "RXO_bset",	/* d |= (1<<s) */
121   "RXO_bclr",	/* d &= ~(1<<s) */
122   "RXO_btst",	/* s & (1<<s2) */
123   "RXO_bnot",	/* d ^= (1<<s) */
124   "RXO_bmcc",	/* d<s> = cond(s2) */
125 
126   "RXO_clrpsw",	/* flag index in d */
127   "RXO_setpsw",	/* flag index in d */
128   "RXO_mvtipl",	/* new IPL in s */
129 
130   "RXO_rtfi",
131   "RXO_rte",
132   "RXO_rtd",	/* undocumented */
133   "RXO_brk",
134   "RXO_dbt",	/* undocumented */
135   "RXO_int",	/* vector id in s */
136   "RXO_stop",
137   "RXO_wait",
138 
139   "RXO_sccnd",	/* d = cond(s) ? 1 : 0 */
140 };
141 
142 static const char * optype_names[] = {
143   " -  ",
144   "#Imm",	/* #addend */
145   " Rn ",	/* Rn */
146   "[Rn]",	/* [Rn + addend] */
147   "Ps++",	/* [Rn+] */
148   "--Pr",	/* [-Rn] */
149   " cc ",	/* eq, gtu, etc */
150   "Flag",	/* [UIOSZC] */
151   "RbRi"	/* [Rb + scale * Ri] */
152 };
153 
154 #define N_RXO (sizeof(id_names)/sizeof(id_names[0]))
155 #define N_RXT (sizeof(optype_names)/sizeof(optype_names[0]))
156 #define N_MAP 90
157 
158 static unsigned long long benchmark_start_cycle;
159 static unsigned long long benchmark_end_cycle;
160 
161 static int op_cache[N_RXT][N_RXT][N_RXT];
162 static int op_cache_rev[N_MAP];
163 static int op_cache_idx = 0;
164 
165 static int
166 op_lookup (int a, int b, int c)
167 {
168   if (op_cache[a][b][c])
169     return op_cache[a][b][c];
170   op_cache_idx ++;
171   if (op_cache_idx >= N_MAP)
172     {
173       printf("op_cache_idx exceeds %d\n", N_MAP);
174       exit(1);
175     }
176   op_cache[a][b][c] = op_cache_idx;
177   op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c;
178   return op_cache_idx;
179 }
180 
181 static char *
182 op_cache_string (int map)
183 {
184   static int ci;
185   static char cb[5][20];
186   int a, b, c;
187 
188   map = op_cache_rev[map];
189   a = (map >> 8) & 15;
190   b = (map >> 4) & 15;
191   c = (map >> 0) & 15;
192   ci = (ci + 1) % 5;
193   sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]);
194   return cb[ci];
195 }
196 
197 static unsigned long long cycles_per_id[N_RXO][N_MAP];
198 static unsigned long long times_per_id[N_RXO][N_MAP];
199 static unsigned long long memory_stalls;
200 static unsigned long long register_stalls;
201 static unsigned long long branch_stalls;
202 static unsigned long long branch_alignment_stalls;
203 static unsigned long long fast_returns;
204 
205 static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP];
206 static int prev_opcode_id = RXO_unknown;
207 static int po0;
208 
209 #define STATS(x) x
210 
211 #else
212 #define STATS(x)
213 #endif /* CYCLE_STATS */
214 
215 
216 #ifdef CYCLE_ACCURATE
217 
218 static int new_rt = -1;
219 
220 /* Number of cycles to add if an insn spans an 8-byte boundary.  */
221 static int branch_alignment_penalty = 0;
222 
223 #endif
224 
225 static int running_benchmark = 1;
226 
227 #define tprintf if (trace && running_benchmark) printf
228 
229 jmp_buf decode_jmp_buf;
230 unsigned int rx_cycles = 0;
231 
232 #ifdef CYCLE_ACCURATE
233 /* If nonzero, memory was read at some point and cycle latency might
234    take effect.  */
235 static int memory_source = 0;
236 /* If nonzero, memory was written and extra cycles might be
237    needed.  */
238 static int memory_dest = 0;
239 
240 static void
241 cycles (int throughput)
242 {
243   tprintf("%d cycles\n", throughput);
244   regs.cycle_count += throughput;
245 }
246 
247 /* Number of execution (E) cycles the op uses.  For memory sources, we
248    include the load micro-op stall as two extra E cycles.  */
249 #define E(c) cycles (memory_source ? c + 2 : c)
250 #define E1 cycles (1)
251 #define E2 cycles (2)
252 #define EBIT cycles (memory_source ? 2 : 1)
253 
254 /* Check to see if a read latency must be applied for a given register.  */
255 #define RL(r) \
256   if (regs.rt == r )							\
257     {									\
258       tprintf("register %d load stall\n", r);				\
259       regs.cycle_count ++;						\
260       STATS(register_stalls ++);					\
261       regs.rt = -1;							\
262     }
263 
264 #define RLD(r)					\
265   if (memory_source)				\
266     {						\
267       tprintf ("Rt now %d\n", r);		\
268       new_rt = r;				\
269     }
270 
271 static int
272 lsb_count (unsigned long v, int is_signed)
273 {
274   int i, lsb;
275   if (is_signed && (v & 0x80000000U))
276     v = (unsigned long)(long)(-v);
277   for (i=31; i>=0; i--)
278     if (v & (1 << i))
279       {
280 	/* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
281 	lsb = (i + 2) / 2;
282 	return lsb;
283       }
284   return 0;
285 }
286 
287 static int
288 divu_cycles(unsigned long num, unsigned long den)
289 {
290   int nb = lsb_count (num, 0);
291   int db = lsb_count (den, 0);
292   int rv;
293 
294   if (nb < db)
295     rv = 2;
296   else
297     rv = 3 + nb - db;
298   E (rv);
299   return rv;
300 }
301 
302 static int
303 div_cycles(long num, long den)
304 {
305   int nb = lsb_count ((unsigned long)num, 1);
306   int db = lsb_count ((unsigned long)den, 1);
307   int rv;
308 
309   if (nb < db)
310     rv = 3;
311   else
312     rv = 5 + nb - db;
313   E (rv);
314   return rv;
315 }
316 
317 #else /* !CYCLE_ACCURATE */
318 
319 #define cycles(t)
320 #define E(c)
321 #define E1
322 #define E2
323 #define EBIT
324 #define RL(r)
325 #define RLD(r)
326 
327 #define divu_cycles(n,d)
328 #define div_cycles(n,d)
329 
330 #endif /* else CYCLE_ACCURATE */
331 
332 static int size2bytes[] = {
333   4, 1, 1, 1, 2, 2, 2, 3, 4
334 };
335 
336 typedef struct {
337   unsigned long dpc;
338 } RX_Data;
339 
340 #define rx_abort() _rx_abort(__FILE__, __LINE__)
341 static void
342 _rx_abort (const char *file, int line)
343 {
344   if (strrchr (file, '/'))
345     file = strrchr (file, '/') + 1;
346   fprintf(stderr, "abort at %s:%d\n", file, line);
347   abort();
348 }
349 
350 static unsigned char *get_byte_base;
351 static RX_Opcode_Decoded **decode_cache_base;
352 static SI get_byte_page;
353 
354 void
355 reset_decoder (void)
356 {
357   get_byte_base = 0;
358   decode_cache_base = 0;
359   get_byte_page = 0;
360 }
361 
362 static inline void
363 maybe_get_mem_page (SI tpc)
364 {
365   if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting)
366     {
367       get_byte_page = tpc & NONPAGE_MASK;
368       get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page;
369       decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page;
370     }
371 }
372 
373 /* This gets called a *lot* so optimize it.  */
374 static int
375 rx_get_byte (void *vdata)
376 {
377   RX_Data *rx_data = (RX_Data *)vdata;
378   SI tpc = rx_data->dpc;
379 
380   /* See load.c for an explanation of this.  */
381   if (rx_big_endian)
382     tpc ^= 3;
383 
384   maybe_get_mem_page (tpc);
385 
386   rx_data->dpc ++;
387   return get_byte_base [tpc];
388 }
389 
390 static int
391 get_op (const RX_Opcode_Decoded *rd, int i)
392 {
393   const RX_Opcode_Operand *o = rd->op + i;
394   int addr, rv = 0;
395 
396   switch (o->type)
397     {
398     case RX_Operand_None:
399       rx_abort ();
400 
401     case RX_Operand_Immediate:	/* #addend */
402       return o->addend;
403 
404     case RX_Operand_Register:	/* Rn */
405       RL (o->reg);
406       rv = get_reg (o->reg);
407       break;
408 
409     case RX_Operand_Predec:	/* [-Rn] */
410       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
411       /* fall through */
412     case RX_Operand_Postinc:	/* [Rn+] */
413     case RX_Operand_Zero_Indirect:	/* [Rn + 0] */
414     case RX_Operand_Indirect:	/* [Rn + addend] */
415     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
416 #ifdef CYCLE_ACCURATE
417       RL (o->reg);
418       if (o->type == RX_Operand_TwoReg)
419 	RL (rd->op[2].reg);
420       regs.rt = -1;
421       if (regs.m2m == M2M_BOTH)
422 	{
423 	  tprintf("src memory stall\n");
424 #ifdef CYCLE_STATS
425 	  memory_stalls ++;
426 #endif
427 	  regs.cycle_count ++;
428 	  regs.m2m = 0;
429 	}
430 
431       memory_source = 1;
432 #endif
433 
434       if (o->type == RX_Operand_TwoReg)
435 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
436       else
437 	addr = get_reg (o->reg) + o->addend;
438 
439       switch (o->size)
440 	{
441 	default:
442 	case RX_AnySize:
443 	  rx_abort ();
444 
445 	case RX_Byte: /* undefined extension */
446 	case RX_UByte:
447 	case RX_SByte:
448 	  rv = mem_get_qi (addr);
449 	  break;
450 
451 	case RX_Word: /* undefined extension */
452 	case RX_UWord:
453 	case RX_SWord:
454 	  rv = mem_get_hi (addr);
455 	  break;
456 
457 	case RX_3Byte:
458 	  rv = mem_get_psi (addr);
459 	  break;
460 
461 	case RX_Long:
462 	  rv = mem_get_si (addr);
463 	  break;
464 	}
465 
466       if (o->type == RX_Operand_Postinc)
467 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
468 
469       break;
470 
471     case RX_Operand_Condition:	/* eq, gtu, etc */
472       return condition_true (o->reg);
473 
474     case RX_Operand_Flag:	/* [UIOSZC] */
475       return (regs.r_psw & (1 << o->reg)) ? 1 : 0;
476     }
477 
478   /* if we've gotten here, we need to clip/extend the value according
479      to the size.  */
480   switch (o->size)
481     {
482     default:
483     case RX_AnySize:
484       rx_abort ();
485 
486     case RX_Byte: /* undefined extension */
487       rv |= 0xdeadbe00; /* keep them honest */
488       break;
489 
490     case RX_UByte:
491       rv &= 0xff;
492       break;
493 
494     case RX_SByte:
495       rv = sign_ext (rv, 8);
496       break;
497 
498     case RX_Word: /* undefined extension */
499       rv |= 0xdead0000; /* keep them honest */
500       break;
501 
502     case RX_UWord:
503       rv &=  0xffff;
504       break;
505 
506     case RX_SWord:
507       rv = sign_ext (rv, 16);
508       break;
509 
510     case RX_3Byte:
511       rv &= 0xffffff;
512       break;
513 
514     case RX_Long:
515       break;
516     }
517   return rv;
518 }
519 
520 static void
521 put_op (const RX_Opcode_Decoded *rd, int i, int v)
522 {
523   const RX_Opcode_Operand *o = rd->op + i;
524   int addr;
525 
526   switch (o->size)
527     {
528     default:
529     case RX_AnySize:
530       if (o->type != RX_Operand_Register)
531 	rx_abort ();
532       break;
533 
534     case RX_Byte: /* undefined extension */
535       v |= 0xdeadbe00; /* keep them honest */
536       break;
537 
538     case RX_UByte:
539       v &= 0xff;
540       break;
541 
542     case RX_SByte:
543       v = sign_ext (v, 8);
544       break;
545 
546     case RX_Word: /* undefined extension */
547       v |= 0xdead0000; /* keep them honest */
548       break;
549 
550     case RX_UWord:
551       v &=  0xffff;
552       break;
553 
554     case RX_SWord:
555       v = sign_ext (v, 16);
556       break;
557 
558     case RX_3Byte:
559       v &= 0xffffff;
560       break;
561 
562     case RX_Long:
563       break;
564     }
565 
566   switch (o->type)
567     {
568     case RX_Operand_None:
569       /* Opcodes like TST and CMP use this.  */
570       break;
571 
572     case RX_Operand_Immediate:	/* #addend */
573     case RX_Operand_Condition:	/* eq, gtu, etc */
574       rx_abort ();
575 
576     case RX_Operand_Register:	/* Rn */
577       put_reg (o->reg, v);
578       RLD (o->reg);
579       break;
580 
581     case RX_Operand_Predec:	/* [-Rn] */
582       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
583       /* fall through */
584     case RX_Operand_Postinc:	/* [Rn+] */
585     case RX_Operand_Zero_Indirect:	/* [Rn + 0] */
586     case RX_Operand_Indirect:	/* [Rn + addend] */
587     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
588 
589 #ifdef CYCLE_ACCURATE
590       if (regs.m2m == M2M_BOTH)
591 	{
592 	  tprintf("dst memory stall\n");
593 	  regs.cycle_count ++;
594 #ifdef CYCLE_STATS
595 	  memory_stalls ++;
596 #endif
597 	  regs.m2m = 0;
598 	}
599       memory_dest = 1;
600 #endif
601 
602       if (o->type == RX_Operand_TwoReg)
603 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
604       else
605 	addr = get_reg (o->reg) + o->addend;
606 
607       switch (o->size)
608 	{
609 	default:
610 	case RX_AnySize:
611 	  rx_abort ();
612 
613 	case RX_Byte: /* undefined extension */
614 	case RX_UByte:
615 	case RX_SByte:
616 	  mem_put_qi (addr, v);
617 	  break;
618 
619 	case RX_Word: /* undefined extension */
620 	case RX_UWord:
621 	case RX_SWord:
622 	  mem_put_hi (addr, v);
623 	  break;
624 
625 	case RX_3Byte:
626 	  mem_put_psi (addr, v);
627 	  break;
628 
629 	case RX_Long:
630 	  mem_put_si (addr, v);
631 	  break;
632 	}
633 
634       if (o->type == RX_Operand_Postinc)
635 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
636 
637       break;
638 
639     case RX_Operand_Flag:	/* [UIOSZC] */
640       if (v)
641 	regs.r_psw |= (1 << o->reg);
642       else
643 	regs.r_psw &= ~(1 << o->reg);
644       break;
645     }
646 }
647 
648 #define PD(x) put_op (opcode, 0, x)
649 #define PS(x) put_op (opcode, 1, x)
650 #define PS2(x) put_op (opcode, 2, x)
651 #define GD() get_op (opcode, 0)
652 #define GS() get_op (opcode, 1)
653 #define GS2() get_op (opcode, 2)
654 #define DSZ() size2bytes[opcode->op[0].size]
655 #define SSZ() size2bytes[opcode->op[0].size]
656 #define S2SZ() size2bytes[opcode->op[0].size]
657 
658 /* "Universal" sources.  */
659 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
660 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
661 
662 static void
663 push(int val)
664 {
665   int rsp = get_reg (sp);
666   rsp -= 4;
667   put_reg (sp, rsp);
668   mem_put_si (rsp, val);
669 }
670 
671 /* Just like the above, but tag the memory as "pushed pc" so if anyone
672    tries to write to it, it will cause an error.  */
673 static void
674 pushpc(int val)
675 {
676   int rsp = get_reg (sp);
677   rsp -= 4;
678   put_reg (sp, rsp);
679   mem_put_si (rsp, val);
680   mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC);
681 }
682 
683 static int
684 pop()
685 {
686   int rv;
687   int rsp = get_reg (sp);
688   rv = mem_get_si (rsp);
689   rsp += 4;
690   put_reg (sp, rsp);
691   return rv;
692 }
693 
694 static int
695 poppc()
696 {
697   int rv;
698   int rsp = get_reg (sp);
699   if (mem_get_content_type (rsp) != MC_PUSHED_PC)
700     execution_error (SIM_ERR_CORRUPT_STACK, rsp);
701   rv = mem_get_si (rsp);
702   mem_set_content_range (rsp, rsp+3, MC_UNINIT);
703   rsp += 4;
704   put_reg (sp, rsp);
705   return rv;
706 }
707 
708 #define MATH_OP(vop,c)				\
709 { \
710   umb = US2(); \
711   uma = US1(); \
712   ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
713   tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
714   ma = sign_ext (uma, DSZ() * 8);					\
715   mb = sign_ext (umb, DSZ() * 8);					\
716   sll = (long long) ma vop (long long) mb vop c; \
717   tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
718   set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
719   PD (sll); \
720   E (1);    \
721 }
722 
723 #define LOGIC_OP(vop) \
724 { \
725   mb = US2(); \
726   ma = US1(); \
727   v = ma vop mb; \
728   tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
729   set_sz (v, DSZ()); \
730   PD(v); \
731   E (1); \
732 }
733 
734 #define SHIFT_OP(val, type, count, OP, carry_mask)	\
735 { \
736   int i, c=0; \
737   count = US2(); \
738   val = (type)US1();				\
739   tprintf("%lld " #OP " %d\n", val, count); \
740   for (i = 0; i < count; i ++) \
741     { \
742       c = val & carry_mask; \
743       val OP 1; \
744     } \
745   set_oszc (val, 4, c); \
746   PD (val); \
747 }
748 
749 typedef union {
750   int i;
751   float f;
752 } FloatInt;
753 
754 static inline int
755 float2int (float f)
756 {
757   FloatInt fi;
758   fi.f = f;
759   return fi.i;
760 }
761 
762 static inline float
763 int2float (int i)
764 {
765   FloatInt fi;
766   fi.i = i;
767   return fi.f;
768 }
769 
770 static int
771 fop_fadd (fp_t s1, fp_t s2, fp_t *d)
772 {
773   *d = rxfp_add (s1, s2);
774   return 1;
775 }
776 
777 static int
778 fop_fmul (fp_t s1, fp_t s2, fp_t *d)
779 {
780   *d = rxfp_mul (s1, s2);
781   return 1;
782 }
783 
784 static int
785 fop_fdiv (fp_t s1, fp_t s2, fp_t *d)
786 {
787   *d = rxfp_div (s1, s2);
788   return 1;
789 }
790 
791 static int
792 fop_fsub (fp_t s1, fp_t s2, fp_t *d)
793 {
794   *d = rxfp_sub (s1, s2);
795   return 1;
796 }
797 
798 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
799 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
800 #define FPCHECK() \
801   if (FPPENDING()) \
802     return do_fp_exception (opcode_pc)
803 
804 #define FLOAT_OP(func) \
805 { \
806   int do_store;   \
807   fp_t fa, fb, fc; \
808   FPCLEAR(); \
809   fb = GS (); \
810   fa = GD (); \
811   do_store = fop_##func (fa, fb, &fc); \
812   tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
813   FPCHECK(); \
814   if (do_store) \
815     PD (fc);	\
816   mb = 0; \
817   if ((fc & 0x80000000UL) != 0) \
818     mb |= FLAGBIT_S; \
819   if ((fc & 0x7fffffffUL) == 0)			\
820     mb |= FLAGBIT_Z; \
821   set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
822 }
823 
824 #define carry (FLAG_C ? 1 : 0)
825 
826 static struct {
827   unsigned long vaddr;
828   const char *str;
829   int signal;
830 } exception_info[] = {
831   { 0xFFFFFFD0UL, "priviledged opcode", SIGILL },
832   { 0xFFFFFFD4UL, "access violation", SIGSEGV },
833   { 0xFFFFFFDCUL, "undefined opcode", SIGILL },
834   { 0xFFFFFFE4UL, "floating point", SIGFPE }
835 };
836 #define EX_PRIVILEDGED	0
837 #define EX_ACCESS	1
838 #define EX_UNDEFINED	2
839 #define EX_FLOATING	3
840 #define EXCEPTION(n)  \
841   return generate_exception (n, opcode_pc)
842 
843 #define PRIVILEDGED() \
844   if (FLAG_PM) \
845     EXCEPTION (EX_PRIVILEDGED)
846 
847 static int
848 generate_exception (unsigned long type, SI opcode_pc)
849 {
850   SI old_psw, old_pc, new_pc;
851 
852   new_pc = mem_get_si (exception_info[type].vaddr);
853   /* 0x00020000 is the value used to initialise the known
854      exception vectors (see rx.ld), but it is a reserved
855      area of memory so do not try to access it, and if the
856      value has not been changed by the program then the
857      vector has not been installed.  */
858   if (new_pc == 0 || new_pc == 0x00020000)
859     {
860       if (rx_in_gdb)
861 	return RX_MAKE_STOPPED (exception_info[type].signal);
862 
863       fprintf(stderr, "Unhandled %s exception at pc = %#lx\n",
864 	      exception_info[type].str, (unsigned long) opcode_pc);
865       if (type == EX_FLOATING)
866 	{
867 	  int mask = FPPENDING ();
868 	  fprintf (stderr, "Pending FP exceptions:");
869 	  if (mask & FPSWBITS_FV)
870 	    fprintf(stderr, " Invalid");
871 	  if (mask & FPSWBITS_FO)
872 	    fprintf(stderr, " Overflow");
873 	  if (mask & FPSWBITS_FZ)
874 	    fprintf(stderr, " Division-by-zero");
875 	  if (mask & FPSWBITS_FU)
876 	    fprintf(stderr, " Underflow");
877 	  if (mask & FPSWBITS_FX)
878 	    fprintf(stderr, " Inexact");
879 	  if (mask & FPSWBITS_CE)
880 	    fprintf(stderr, " Unimplemented");
881 	  fprintf(stderr, "\n");
882 	}
883       return RX_MAKE_EXITED (1);
884     }
885 
886   tprintf ("Triggering %s exception\n", exception_info[type].str);
887 
888   old_psw = regs.r_psw;
889   regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
890   old_pc = opcode_pc;
891   regs.r_pc = new_pc;
892   pushpc (old_psw);
893   pushpc (old_pc);
894   return RX_MAKE_STEPPED ();
895 }
896 
897 void
898 generate_access_exception (void)
899 {
900   int rv;
901 
902   rv = generate_exception (EX_ACCESS, regs.r_pc);
903   if (RX_EXITED (rv))
904     longjmp (decode_jmp_buf, rv);
905 }
906 
907 static int
908 do_fp_exception (unsigned long opcode_pc)
909 {
910   while (FPPENDING())
911     EXCEPTION (EX_FLOATING);
912   return RX_MAKE_STEPPED ();
913 }
914 
915 static int
916 op_is_memory (const RX_Opcode_Decoded *rd, int i)
917 {
918   switch (rd->op[i].type)
919     {
920     case RX_Operand_Predec:
921     case RX_Operand_Postinc:
922     case RX_Operand_Indirect:
923       return 1;
924     default:
925       return 0;
926     }
927 }
928 #define OM(i) op_is_memory (opcode, i)
929 
930 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
931 
932 int
933 decode_opcode ()
934 {
935   unsigned int uma=0, umb=0;
936   int ma=0, mb=0;
937   int opcode_size, v;
938   unsigned long long ll;
939   long long sll;
940   unsigned long opcode_pc;
941   RX_Data rx_data;
942   const RX_Opcode_Decoded *opcode;
943 #ifdef CYCLE_STATS
944   unsigned long long prev_cycle_count;
945 #endif
946 #ifdef CYCLE_ACCURATE
947   unsigned int tx;
948 #endif
949 
950 #ifdef CYCLE_STATS
951   prev_cycle_count = regs.cycle_count;
952 #endif
953 
954 #ifdef CYCLE_ACCURATE
955   memory_source = 0;
956   memory_dest = 0;
957 #endif
958 
959   rx_cycles ++;
960 
961   maybe_get_mem_page (regs.r_pc);
962 
963   opcode_pc = regs.r_pc;
964 
965   /* Note that we don't word-swap this point, there's no point.  */
966   if (decode_cache_base[opcode_pc] == NULL)
967     {
968       RX_Opcode_Decoded *opcode_w;
969       rx_data.dpc = opcode_pc;
970       opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded));
971       opcode_size = rx_decode_opcode (opcode_pc, opcode_w,
972 				      rx_get_byte, &rx_data);
973       opcode = opcode_w;
974     }
975   else
976     {
977       opcode = decode_cache_base[opcode_pc];
978       opcode_size = opcode->n_bytes;
979     }
980 
981 #ifdef CYCLE_ACCURATE
982   if (branch_alignment_penalty)
983     {
984       if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7)
985 	{
986 	  tprintf("1 cycle branch alignment penalty\n");
987 	  cycles (branch_alignment_penalty);
988 #ifdef CYCLE_STATS
989 	  branch_alignment_stalls ++;
990 #endif
991 	}
992       branch_alignment_penalty = 0;
993     }
994 #endif
995 
996   regs.r_pc += opcode_size;
997 
998   rx_flagmask = opcode->flags_s;
999   rx_flagand = ~(int)opcode->flags_0;
1000   rx_flagor = opcode->flags_1;
1001 
1002   switch (opcode->id)
1003     {
1004     case RXO_abs:
1005       sll = GS ();
1006       tprintf("|%lld| = ", sll);
1007       if (sll < 0)
1008 	sll = -sll;
1009       tprintf("%lld\n", sll);
1010       PD (sll);
1011       set_osz (sll, 4);
1012       E (1);
1013       break;
1014 
1015     case RXO_adc:
1016       MATH_OP (+,carry);
1017       break;
1018 
1019     case RXO_add:
1020       MATH_OP (+,0);
1021       break;
1022 
1023     case RXO_and:
1024       LOGIC_OP (&);
1025       break;
1026 
1027     case RXO_bclr:
1028       ma = GD ();
1029       mb = GS ();
1030       if (opcode->op[0].type == RX_Operand_Register)
1031 	mb &= 0x1f;
1032       else
1033 	mb &= 0x07;
1034       ma &= ~(1 << mb);
1035       PD (ma);
1036       EBIT;
1037       break;
1038 
1039     case RXO_bmcc:
1040       ma = GD ();
1041       mb = GS ();
1042       if (opcode->op[0].type == RX_Operand_Register)
1043 	mb &= 0x1f;
1044       else
1045 	mb &= 0x07;
1046       if (GS2 ())
1047 	ma |= (1 << mb);
1048       else
1049 	ma &= ~(1 << mb);
1050       PD (ma);
1051       EBIT;
1052       break;
1053 
1054     case RXO_bnot:
1055       ma = GD ();
1056       mb = GS ();
1057       if (opcode->op[0].type == RX_Operand_Register)
1058 	mb &= 0x1f;
1059       else
1060 	mb &= 0x07;
1061       ma ^= (1 << mb);
1062       PD (ma);
1063       EBIT;
1064       break;
1065 
1066     case RXO_branch:
1067       if (opcode->op[1].type == RX_Operand_None || GS())
1068 	{
1069 #ifdef CYCLE_ACCURATE
1070 	  SI old_pc = regs.r_pc;
1071 	  int delta;
1072 #endif
1073 	  regs.r_pc = GD();
1074 #ifdef CYCLE_ACCURATE
1075 	  delta = regs.r_pc - old_pc;
1076 	  if (delta >= 0 && delta < 16
1077 	      && opcode_size > 1)
1078 	    {
1079 	      tprintf("near forward branch bonus\n");
1080 	      cycles (2);
1081 	    }
1082 	  else
1083 	    {
1084 	      cycles (3);
1085 	      branch_alignment_penalty = 1;
1086 	    }
1087 #ifdef CYCLE_STATS
1088 	  branch_stalls ++;
1089 #endif
1090 #endif
1091 	}
1092 #ifdef CYCLE_ACCURATE
1093       else
1094 	cycles (1);
1095 #endif
1096       break;
1097 
1098     case RXO_branchrel:
1099       if (opcode->op[1].type == RX_Operand_None || GS())
1100 	{
1101 	  int delta = GD();
1102 	  regs.r_pc = opcode_pc + delta;
1103 #ifdef CYCLE_ACCURATE
1104 	  /* Note: specs say 3, chip says 2.  */
1105 	  if (delta >= 0 && delta < 16
1106 	      && opcode_size > 1)
1107 	    {
1108 	      tprintf("near forward branch bonus\n");
1109 	      cycles (2);
1110 	    }
1111 	  else
1112 	    {
1113 	      cycles (3);
1114 	      branch_alignment_penalty = 1;
1115 	    }
1116 #ifdef CYCLE_STATS
1117 	  branch_stalls ++;
1118 #endif
1119 #endif
1120 	}
1121 #ifdef CYCLE_ACCURATE
1122       else
1123 	cycles (1);
1124 #endif
1125       break;
1126 
1127     case RXO_brk:
1128       {
1129 	int old_psw = regs.r_psw;
1130 	if (rx_in_gdb)
1131 	  DO_RETURN (RX_MAKE_HIT_BREAK ());
1132 	if (regs.r_intb == 0)
1133 	  {
1134 	    tprintf("BREAK hit, no vector table.\n");
1135 	    DO_RETURN (RX_MAKE_EXITED(1));
1136 	  }
1137 	regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1138 	pushpc (old_psw);
1139 	pushpc (regs.r_pc);
1140 	regs.r_pc = mem_get_si (regs.r_intb);
1141 	cycles(6);
1142       }
1143       break;
1144 
1145     case RXO_bset:
1146       ma = GD ();
1147       mb = GS ();
1148       if (opcode->op[0].type == RX_Operand_Register)
1149 	mb &= 0x1f;
1150       else
1151 	mb &= 0x07;
1152       ma |= (1 << mb);
1153       PD (ma);
1154       EBIT;
1155       break;
1156 
1157     case RXO_btst:
1158       ma = GS ();
1159       mb = GS2 ();
1160       if (opcode->op[1].type == RX_Operand_Register)
1161 	mb &= 0x1f;
1162       else
1163 	mb &= 0x07;
1164       umb = ma & (1 << mb);
1165       set_zc (! umb, umb);
1166       EBIT;
1167       break;
1168 
1169     case RXO_clrpsw:
1170       v = 1 << opcode->op[0].reg;
1171       if (FLAG_PM
1172 	  && (v == FLAGBIT_I
1173 	      || v == FLAGBIT_U))
1174 	break;
1175       regs.r_psw &= ~v;
1176       cycles (1);
1177       break;
1178 
1179     case RXO_div: /* d = d / s */
1180       ma = GS();
1181       mb = GD();
1182       tprintf("%d / %d = ", mb, ma);
1183       if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000))
1184 	{
1185 	  tprintf("#NAN\n");
1186 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1187 	  cycles (3);
1188 	}
1189       else
1190 	{
1191 	  v = mb/ma;
1192 	  tprintf("%d\n", v);
1193 	  set_flags (FLAGBIT_O, 0);
1194 	  PD (v);
1195 	  div_cycles (mb, ma);
1196 	}
1197       break;
1198 
1199     case RXO_divu: /* d = d / s */
1200       uma = GS();
1201       umb = GD();
1202       tprintf("%u / %u = ", umb, uma);
1203       if (uma == 0)
1204 	{
1205 	  tprintf("#NAN\n");
1206 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1207 	  cycles (2);
1208 	}
1209       else
1210 	{
1211 	  v = umb / uma;
1212 	  tprintf("%u\n", v);
1213 	  set_flags (FLAGBIT_O, 0);
1214 	  PD (v);
1215 	  divu_cycles (umb, uma);
1216 	}
1217       break;
1218 
1219     case RXO_emul:
1220       ma = GD ();
1221       mb = GS ();
1222       sll = (long long)ma * (long long)mb;
1223       tprintf("%d * %d = %lld\n", ma, mb, sll);
1224       put_reg (opcode->op[0].reg, sll);
1225       put_reg (opcode->op[0].reg + 1, sll >> 32);
1226       E2;
1227       break;
1228 
1229     case RXO_emulu:
1230       uma = GD ();
1231       umb = GS ();
1232       ll = (long long)uma * (long long)umb;
1233       tprintf("%#x * %#x = %#llx\n", uma, umb, ll);
1234       put_reg (opcode->op[0].reg, ll);
1235       put_reg (opcode->op[0].reg + 1, ll >> 32);
1236       E2;
1237       break;
1238 
1239     case RXO_fadd:
1240       FLOAT_OP (fadd);
1241       E (4);
1242       break;
1243 
1244     case RXO_fcmp:
1245       ma = GD();
1246       mb = GS();
1247       FPCLEAR ();
1248       rxfp_cmp (ma, mb);
1249       FPCHECK ();
1250       E (1);
1251       break;
1252 
1253     case RXO_fdiv:
1254       FLOAT_OP (fdiv);
1255       E (16);
1256       break;
1257 
1258     case RXO_fmul:
1259       FLOAT_OP (fmul);
1260       E (3);
1261       break;
1262 
1263     case RXO_rtfi:
1264       PRIVILEDGED ();
1265       regs.r_psw = regs.r_bpsw;
1266       regs.r_pc = regs.r_bpc;
1267 #ifdef CYCLE_ACCURATE
1268       regs.fast_return = 0;
1269       cycles(3);
1270 #endif
1271       break;
1272 
1273     case RXO_fsub:
1274       FLOAT_OP (fsub);
1275       E (4);
1276       break;
1277 
1278     case RXO_ftoi:
1279       ma = GS ();
1280       FPCLEAR ();
1281       mb = rxfp_ftoi (ma, FPRM_ZERO);
1282       FPCHECK ();
1283       PD (mb);
1284       tprintf("(int) %g = %d\n", int2float(ma), mb);
1285       set_sz (mb, 4);
1286       E (2);
1287       break;
1288 
1289     case RXO_int:
1290       v = GS ();
1291       if (v == 255)
1292 	{
1293 	  int rc = rx_syscall (regs.r[5]);
1294 	  if (! RX_STEPPED (rc))
1295 	    DO_RETURN (rc);
1296 	}
1297       else
1298 	{
1299 	  int old_psw = regs.r_psw;
1300 	  regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1301 	  pushpc (old_psw);
1302 	  pushpc (regs.r_pc);
1303 	  regs.r_pc = mem_get_si (regs.r_intb + 4 * v);
1304 	}
1305       cycles (6);
1306       break;
1307 
1308     case RXO_itof:
1309       ma = GS ();
1310       FPCLEAR ();
1311       mb = rxfp_itof (ma, regs.r_fpsw);
1312       FPCHECK ();
1313       tprintf("(float) %d = %x\n", ma, mb);
1314       PD (mb);
1315       set_sz (ma, 4);
1316       E (2);
1317       break;
1318 
1319     case RXO_jsr:
1320     case RXO_jsrrel:
1321       {
1322 #ifdef CYCLE_ACCURATE
1323 	int delta;
1324 	regs.m2m = 0;
1325 #endif
1326 	v = GD ();
1327 #ifdef CYCLE_ACCURATE
1328 	regs.link_register = regs.r_pc;
1329 #endif
1330 	pushpc (get_reg (pc));
1331 	if (opcode->id == RXO_jsrrel)
1332 	  v += regs.r_pc;
1333 #ifdef CYCLE_ACCURATE
1334 	delta = v - regs.r_pc;
1335 #endif
1336 	put_reg (pc, v);
1337 #ifdef CYCLE_ACCURATE
1338 	/* Note: docs say 3, chip says 2 */
1339 	if (delta >= 0 && delta < 16)
1340 	  {
1341 	    tprintf ("near forward jsr bonus\n");
1342 	    cycles (2);
1343 	  }
1344 	else
1345 	  {
1346 	    branch_alignment_penalty = 1;
1347 	    cycles (3);
1348 	  }
1349 	regs.fast_return = 1;
1350 #endif
1351       }
1352       break;
1353 
1354     case RXO_machi:
1355       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1356       ll <<= 16;
1357       put_reg64 (acc64, ll + regs.r_acc);
1358       E1;
1359       break;
1360 
1361     case RXO_maclo:
1362       ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1363       ll <<= 16;
1364       put_reg64 (acc64, ll + regs.r_acc);
1365       E1;
1366       break;
1367 
1368     case RXO_max:
1369       mb = GS();
1370       ma = GD();
1371       if (ma > mb)
1372 	PD (ma);
1373       else
1374 	PD (mb);
1375       E (1);
1376       break;
1377 
1378     case RXO_min:
1379       mb = GS();
1380       ma = GD();
1381       if (ma < mb)
1382 	PD (ma);
1383       else
1384 	PD (mb);
1385       E (1);
1386       break;
1387 
1388     case RXO_mov:
1389       v = GS ();
1390 
1391       if (opcode->op[1].type == RX_Operand_Register
1392 	  && opcode->op[1].reg == 17 /* PC */)
1393 	{
1394 	  /* Special case.  We want the address of the insn, not the
1395 	     address of the next insn.  */
1396 	  v = opcode_pc;
1397 	}
1398 
1399       if (opcode->op[0].type == RX_Operand_Register
1400 	  && opcode->op[0].reg == 16 /* PSW */)
1401 	{
1402 	  /* Special case, LDC and POPC can't ever modify PM.  */
1403 	  int pm = regs.r_psw & FLAGBIT_PM;
1404 	  v &= ~ FLAGBIT_PM;
1405 	  v |= pm;
1406 	  if (pm)
1407 	    {
1408 	      v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1409 	      v |= pm;
1410 	    }
1411 	}
1412       if (FLAG_PM)
1413 	{
1414 	  /* various things can't be changed in user mode.  */
1415 	  if (opcode->op[0].type == RX_Operand_Register)
1416 	    if (opcode->op[0].reg == 32)
1417 	      {
1418 		v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1419 		v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1420 	      }
1421 	  if (opcode->op[0].reg == 34 /* ISP */
1422 	      || opcode->op[0].reg == 37 /* BPSW */
1423 	      || opcode->op[0].reg == 39 /* INTB */
1424 	      || opcode->op[0].reg == 38 /* VCT */)
1425 	    /* These are ignored.  */
1426 	    break;
1427 	}
1428       if (OM(0) && OM(1))
1429 	cycles (2);
1430       else
1431 	cycles (1);
1432 
1433       PD (v);
1434 
1435 #ifdef CYCLE_ACCURATE
1436       if ((opcode->op[0].type == RX_Operand_Predec
1437 	   && opcode->op[1].type == RX_Operand_Register)
1438 	  || (opcode->op[0].type == RX_Operand_Postinc
1439 	      && opcode->op[1].type == RX_Operand_Register))
1440 	{
1441 	  /* Special case: push reg doesn't cause a memory stall.  */
1442 	  memory_dest = 0;
1443 	  tprintf("push special case\n");
1444 	}
1445 #endif
1446 
1447       set_sz (v, DSZ());
1448       break;
1449 
1450     case RXO_movbi:
1451       PD (GS ());
1452       cycles (1);
1453       break;
1454 
1455     case RXO_movbir:
1456       PS (GD ());
1457       cycles (1);
1458       break;
1459 
1460     case RXO_mul:
1461       v = US2 ();
1462       ll = (unsigned long long) US1() * (unsigned long long) v;
1463       PD(ll);
1464       E (1);
1465       break;
1466 
1467     case RXO_mulhi:
1468       v = GS2 ();
1469       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16);
1470       ll <<= 16;
1471       put_reg64 (acc64, ll);
1472       E1;
1473       break;
1474 
1475     case RXO_mullo:
1476       v = GS2 ();
1477       ll = (long long)(signed short)(GS()) * (long long)(signed short)(v);
1478       ll <<= 16;
1479       put_reg64 (acc64, ll);
1480       E1;
1481       break;
1482 
1483     case RXO_mvfachi:
1484       PD (get_reg (acchi));
1485       E1;
1486       break;
1487 
1488     case RXO_mvfaclo:
1489       PD (get_reg (acclo));
1490       E1;
1491       break;
1492 
1493     case RXO_mvfacmi:
1494       PD (get_reg (accmi));
1495       E1;
1496       break;
1497 
1498     case RXO_mvtachi:
1499       put_reg (acchi, GS ());
1500       E1;
1501       break;
1502 
1503     case RXO_mvtaclo:
1504       put_reg (acclo, GS ());
1505       E1;
1506       break;
1507 
1508     case RXO_mvtipl:
1509       regs.r_psw &= ~ FLAGBITS_IPL;
1510       regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL;
1511       E1;
1512       break;
1513 
1514     case RXO_nop:
1515     case RXO_nop2:
1516     case RXO_nop3:
1517     case RXO_nop4:
1518     case RXO_nop5:
1519     case RXO_nop6:
1520     case RXO_nop7:
1521       E1;
1522       break;
1523 
1524     case RXO_or:
1525       LOGIC_OP (|);
1526       break;
1527 
1528     case RXO_popm:
1529       /* POPM cannot pop R0 (sp).  */
1530       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1531 	EXCEPTION (EX_UNDEFINED);
1532       if (opcode->op[1].reg >= opcode->op[2].reg)
1533 	{
1534 	  regs.r_pc = opcode_pc;
1535 	  DO_RETURN (RX_MAKE_STOPPED (SIGILL));
1536 	}
1537       for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++)
1538 	{
1539 	  cycles (1);
1540 	  RLD (v);
1541 	  put_reg (v, pop ());
1542 	}
1543       break;
1544 
1545     case RXO_pushm:
1546       /* PUSHM cannot push R0 (sp).  */
1547       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1548 	EXCEPTION (EX_UNDEFINED);
1549       if (opcode->op[1].reg >= opcode->op[2].reg)
1550 	{
1551 	  regs.r_pc = opcode_pc;
1552 	  return RX_MAKE_STOPPED (SIGILL);
1553 	}
1554       for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--)
1555 	{
1556 	  RL (v);
1557 	  push (get_reg (v));
1558 	}
1559       cycles (opcode->op[2].reg - opcode->op[1].reg + 1);
1560       break;
1561 
1562     case RXO_racw:
1563       ll = get_reg64 (acc64) << GS ();
1564       ll += 0x80000000ULL;
1565       if ((signed long long)ll > (signed long long)0x00007fff00000000ULL)
1566 	ll = 0x00007fff00000000ULL;
1567       else if ((signed long long)ll < (signed long long)0xffff800000000000ULL)
1568 	ll = 0xffff800000000000ULL;
1569       else
1570 	ll &= 0xffffffff00000000ULL;
1571       put_reg64 (acc64, ll);
1572       E1;
1573       break;
1574 
1575     case RXO_rte:
1576       PRIVILEDGED ();
1577       regs.r_pc = poppc ();
1578       regs.r_psw = poppc ();
1579       if (FLAG_PM)
1580 	regs.r_psw |= FLAGBIT_U;
1581 #ifdef CYCLE_ACCURATE
1582       regs.fast_return = 0;
1583       cycles (6);
1584 #endif
1585       break;
1586 
1587     case RXO_revl:
1588       uma = GS ();
1589       umb = (((uma >> 24) & 0xff)
1590 	     | ((uma >> 8) & 0xff00)
1591 	     | ((uma << 8) & 0xff0000)
1592 	     | ((uma << 24) & 0xff000000UL));
1593       PD (umb);
1594       E1;
1595       break;
1596 
1597     case RXO_revw:
1598       uma = GS ();
1599       umb = (((uma >> 8) & 0x00ff00ff)
1600 	     | ((uma << 8) & 0xff00ff00UL));
1601       PD (umb);
1602       E1;
1603       break;
1604 
1605     case RXO_rmpa:
1606       RL(4);
1607       RL(5);
1608 #ifdef CYCLE_ACCURATE
1609       tx = regs.r[3];
1610 #endif
1611 
1612       while (regs.r[3] != 0)
1613 	{
1614 	  long long tmp;
1615 
1616 	  switch (opcode->size)
1617 	    {
1618 	    case RX_Long:
1619 	      ma = mem_get_si (regs.r[1]);
1620 	      mb = mem_get_si (regs.r[2]);
1621 	      regs.r[1] += 4;
1622 	      regs.r[2] += 4;
1623 	      break;
1624 	    case RX_Word:
1625 	      ma = sign_ext (mem_get_hi (regs.r[1]), 16);
1626 	      mb = sign_ext (mem_get_hi (regs.r[2]), 16);
1627 	      regs.r[1] += 2;
1628 	      regs.r[2] += 2;
1629 	      break;
1630 	    case RX_Byte:
1631 	      ma = sign_ext (mem_get_qi (regs.r[1]), 8);
1632 	      mb = sign_ext (mem_get_qi (regs.r[2]), 8);
1633 	      regs.r[1] += 1;
1634 	      regs.r[2] += 1;
1635 	      break;
1636 	    default:
1637 	      abort ();
1638 	    }
1639 	  /* We do the multiply as a signed value.  */
1640 	  sll = (long long)ma * (long long)mb;
1641 	  tprintf("        %016llx = %d * %d\n", sll, ma, mb);
1642 	  /* but we do the sum as unsigned, while sign extending the operands.  */
1643 	  tmp = regs.r[4] + (sll & 0xffffffffUL);
1644 	  regs.r[4] = tmp & 0xffffffffUL;
1645 	  tmp >>= 32;
1646 	  sll >>= 32;
1647 	  tmp += regs.r[5] + (sll & 0xffffffffUL);
1648 	  regs.r[5] = tmp & 0xffffffffUL;
1649 	  tmp >>= 32;
1650 	  sll >>= 32;
1651 	  tmp += regs.r[6] + (sll & 0xffffffffUL);
1652 	  regs.r[6] = tmp & 0xffffffffUL;
1653 	  tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1654 		  (unsigned long) regs.r[6],
1655 		  (unsigned long) regs.r[5],
1656 		  (unsigned long) regs.r[4]);
1657 
1658 	  regs.r[3] --;
1659 	}
1660       if (regs.r[6] & 0x00008000)
1661 	regs.r[6] |= 0xffff0000UL;
1662       else
1663 	regs.r[6] &= 0x0000ffff;
1664       ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0;
1665       if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL)
1666 	set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O);
1667       else
1668 	set_flags (FLAGBIT_O|FLAGBIT_S, ma);
1669 #ifdef CYCLE_ACCURATE
1670       switch (opcode->size)
1671 	{
1672 	case RX_Long:
1673 	  cycles (6 + 4 * tx);
1674 	  break;
1675 	case RX_Word:
1676 	  cycles (6 + 5 * (tx / 2) + 4 * (tx % 2));
1677 	  break;
1678 	case RX_Byte:
1679 	  cycles (6 + 7 * (tx / 4) + 4 * (tx % 4));
1680 	  break;
1681 	default:
1682 	  abort ();
1683 	}
1684 #endif
1685       break;
1686 
1687     case RXO_rolc:
1688       v = GD ();
1689       ma = v & 0x80000000UL;
1690       v <<= 1;
1691       v |= carry;
1692       set_szc (v, 4, ma);
1693       PD (v);
1694       E1;
1695       break;
1696 
1697     case RXO_rorc:
1698       uma = GD ();
1699       mb = uma & 1;
1700       uma >>= 1;
1701       uma |= (carry ? 0x80000000UL : 0);
1702       set_szc (uma, 4, mb);
1703       PD (uma);
1704       E1;
1705       break;
1706 
1707     case RXO_rotl:
1708       mb = GS ();
1709       uma = GD ();
1710       if (mb)
1711 	{
1712 	  uma = (uma << mb) | (uma >> (32-mb));
1713 	  mb = uma & 1;
1714 	}
1715       set_szc (uma, 4, mb);
1716       PD (uma);
1717       E1;
1718       break;
1719 
1720     case RXO_rotr:
1721       mb = GS ();
1722       uma = GD ();
1723       if (mb)
1724 	{
1725 	  uma = (uma >> mb) | (uma << (32-mb));
1726 	  mb = uma & 0x80000000;
1727 	}
1728       set_szc (uma, 4, mb);
1729       PD (uma);
1730       E1;
1731       break;
1732 
1733     case RXO_round:
1734       ma = GS ();
1735       FPCLEAR ();
1736       mb = rxfp_ftoi (ma, regs.r_fpsw);
1737       FPCHECK ();
1738       PD (mb);
1739       tprintf("(int) %g = %d\n", int2float(ma), mb);
1740       set_sz (mb, 4);
1741       E (2);
1742       break;
1743 
1744     case RXO_rts:
1745       {
1746 #ifdef CYCLE_ACCURATE
1747 	int cyc = 5;
1748 #endif
1749 	regs.r_pc = poppc ();
1750 #ifdef CYCLE_ACCURATE
1751 	/* Note: specs say 5, chip says 3.  */
1752 	if (regs.fast_return && regs.link_register == regs.r_pc)
1753 	  {
1754 #ifdef CYCLE_STATS
1755 	    fast_returns ++;
1756 #endif
1757 	    tprintf("fast return bonus\n");
1758 	    cyc -= 2;
1759 	  }
1760 	cycles (cyc);
1761 	regs.fast_return = 0;
1762 	branch_alignment_penalty = 1;
1763 #endif
1764       }
1765       break;
1766 
1767     case RXO_rtsd:
1768       if (opcode->op[2].type == RX_Operand_Register)
1769 	{
1770 	  int i;
1771 	  /* RTSD cannot pop R0 (sp).  */
1772 	  put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4);
1773 	  if (opcode->op[2].reg == 0)
1774 	    EXCEPTION (EX_UNDEFINED);
1775 #ifdef CYCLE_ACCURATE
1776 	  tx = opcode->op[0].reg - opcode->op[2].reg + 1;
1777 #endif
1778 	  for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++)
1779 	    {
1780 	      RLD (i);
1781 	      put_reg (i, pop ());
1782 	    }
1783 	}
1784       else
1785 	{
1786 #ifdef CYCLE_ACCURATE
1787 	  tx = 0;
1788 #endif
1789 	  put_reg (0, get_reg (0) + GS());
1790 	}
1791       put_reg (pc, poppc());
1792 #ifdef CYCLE_ACCURATE
1793       if (regs.fast_return && regs.link_register == regs.r_pc)
1794 	{
1795 	  tprintf("fast return bonus\n");
1796 #ifdef CYCLE_STATS
1797 	  fast_returns ++;
1798 #endif
1799 	  cycles (tx < 3 ? 3 : tx + 1);
1800 	}
1801       else
1802 	{
1803 	  cycles (tx < 5 ? 5 : tx + 1);
1804 	}
1805       regs.fast_return = 0;
1806       branch_alignment_penalty = 1;
1807 #endif
1808       break;
1809 
1810     case RXO_sat:
1811       if (FLAG_O && FLAG_S)
1812 	PD (0x7fffffffUL);
1813       else if (FLAG_O && ! FLAG_S)
1814 	PD (0x80000000UL);
1815       E1;
1816       break;
1817 
1818     case RXO_satr:
1819       if (FLAG_O && ! FLAG_S)
1820 	{
1821 	  put_reg (6, 0x0);
1822 	  put_reg (5, 0x7fffffff);
1823 	  put_reg (4, 0xffffffff);
1824 	}
1825       else if (FLAG_O && FLAG_S)
1826 	{
1827 	  put_reg (6, 0xffffffff);
1828 	  put_reg (5, 0x80000000);
1829 	  put_reg (4, 0x0);
1830 	}
1831       E1;
1832       break;
1833 
1834     case RXO_sbb:
1835       MATH_OP (-, ! carry);
1836       break;
1837 
1838     case RXO_sccnd:
1839       if (GS())
1840 	PD (1);
1841       else
1842 	PD (0);
1843       E1;
1844       break;
1845 
1846     case RXO_scmpu:
1847 #ifdef CYCLE_ACCURATE
1848       tx = regs.r[3];
1849 #endif
1850       while (regs.r[3] != 0)
1851 	{
1852 	  uma = mem_get_qi (regs.r[1] ++);
1853 	  umb = mem_get_qi (regs.r[2] ++);
1854 	  regs.r[3] --;
1855 	  if (uma != umb || uma == 0)
1856 	    break;
1857 	}
1858       if (uma == umb)
1859 	set_zc (1, 1);
1860       else
1861 	set_zc (0, ((int)uma - (int)umb) >= 0);
1862       cycles (2 + 4 * (tx / 4) + 4 * (tx % 4));
1863       break;
1864 
1865     case RXO_setpsw:
1866       v = 1 << opcode->op[0].reg;
1867       if (FLAG_PM
1868 	  && (v == FLAGBIT_I
1869 	      || v == FLAGBIT_U))
1870 	break;
1871       regs.r_psw |= v;
1872       cycles (1);
1873       break;
1874 
1875     case RXO_smovb:
1876       RL (3);
1877 #ifdef CYCLE_ACCURATE
1878       tx = regs.r[3];
1879 #endif
1880       while (regs.r[3])
1881 	{
1882 	  uma = mem_get_qi (regs.r[2] --);
1883 	  mem_put_qi (regs.r[1]--, uma);
1884 	  regs.r[3] --;
1885 	}
1886 #ifdef CYCLE_ACCURATE
1887       if (tx > 3)
1888 	cycles (6 + 3 * (tx / 4) + 3 * (tx % 4));
1889       else
1890 	cycles (2 + 3 * (tx % 4));
1891 #endif
1892       break;
1893 
1894     case RXO_smovf:
1895       RL (3);
1896 #ifdef CYCLE_ACCURATE
1897       tx = regs.r[3];
1898 #endif
1899       while (regs.r[3])
1900 	{
1901 	  uma = mem_get_qi (regs.r[2] ++);
1902 	  mem_put_qi (regs.r[1]++, uma);
1903 	  regs.r[3] --;
1904 	}
1905       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1906       break;
1907 
1908     case RXO_smovu:
1909 #ifdef CYCLE_ACCURATE
1910       tx = regs.r[3];
1911 #endif
1912       while (regs.r[3] != 0)
1913 	{
1914 	  uma = mem_get_qi (regs.r[2] ++);
1915 	  mem_put_qi (regs.r[1]++, uma);
1916 	  regs.r[3] --;
1917 	  if (uma == 0)
1918 	    break;
1919 	}
1920       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1921       break;
1922 
1923     case RXO_shar: /* d = ma >> mb */
1924       SHIFT_OP (sll, int, mb, >>=, 1);
1925       E (1);
1926       break;
1927 
1928     case RXO_shll: /* d = ma << mb */
1929       SHIFT_OP (ll, int, mb, <<=, 0x80000000UL);
1930       E (1);
1931       break;
1932 
1933     case RXO_shlr: /* d = ma >> mb */
1934       SHIFT_OP (ll, unsigned int, mb, >>=, 1);
1935       E (1);
1936       break;
1937 
1938     case RXO_sstr:
1939       RL (3);
1940 #ifdef CYCLE_ACCURATE
1941       tx = regs.r[3];
1942 #endif
1943       switch (opcode->size)
1944 	{
1945 	case RX_Long:
1946 	  while (regs.r[3] != 0)
1947 	    {
1948 	      mem_put_si (regs.r[1], regs.r[2]);
1949 	      regs.r[1] += 4;
1950 	      regs.r[3] --;
1951 	    }
1952 	  cycles (2 + tx);
1953 	  break;
1954 	case RX_Word:
1955 	  while (regs.r[3] != 0)
1956 	    {
1957 	      mem_put_hi (regs.r[1], regs.r[2]);
1958 	      regs.r[1] += 2;
1959 	      regs.r[3] --;
1960 	    }
1961 	  cycles (2 + (int)(tx / 2) + tx % 2);
1962 	  break;
1963 	case RX_Byte:
1964 	  while (regs.r[3] != 0)
1965 	    {
1966 	      mem_put_qi (regs.r[1], regs.r[2]);
1967 	      regs.r[1] ++;
1968 	      regs.r[3] --;
1969 	    }
1970 	  cycles (2 + (int)(tx / 4) + tx % 4);
1971 	  break;
1972 	default:
1973 	  abort ();
1974 	}
1975       break;
1976 
1977     case RXO_stcc:
1978       if (GS2())
1979 	PD (GS ());
1980       E1;
1981       break;
1982 
1983     case RXO_stop:
1984       PRIVILEDGED ();
1985       regs.r_psw |= FLAGBIT_I;
1986       DO_RETURN (RX_MAKE_STOPPED(0));
1987 
1988     case RXO_sub:
1989       MATH_OP (-, 0);
1990       break;
1991 
1992     case RXO_suntil:
1993       RL(3);
1994 #ifdef CYCLE_ACCURATE
1995       tx = 0;
1996 #endif
1997       if (regs.r[3] == 0)
1998 	{
1999 	  cycles (3);
2000 	  break;
2001 	}
2002       switch (opcode->size)
2003 	{
2004 	case RX_Long:
2005 	  uma = get_reg (2);
2006 	  while (regs.r[3] != 0)
2007 	    {
2008 	      regs.r[3] --;
2009 	      umb = mem_get_si (get_reg (1));
2010 	      regs.r[1] += 4;
2011 #ifdef CYCLE_ACCURATE
2012 	      tx ++;
2013 #endif
2014 	      if (umb == uma)
2015 		break;
2016 	    }
2017 #ifdef CYCLE_ACCURATE
2018 	  cycles (3 + 3 * tx);
2019 #endif
2020 	  break;
2021 	case RX_Word:
2022 	  uma = get_reg (2) & 0xffff;
2023 	  while (regs.r[3] != 0)
2024 	    {
2025 	      regs.r[3] --;
2026 	      umb = mem_get_hi (get_reg (1));
2027 	      regs.r[1] += 2;
2028 #ifdef CYCLE_ACCURATE
2029 	      tx ++;
2030 #endif
2031 	      if (umb == uma)
2032 		break;
2033 	    }
2034 #ifdef CYCLE_ACCURATE
2035 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2036 #endif
2037 	  break;
2038 	case RX_Byte:
2039 	  uma = get_reg (2) & 0xff;
2040 	  while (regs.r[3] != 0)
2041 	    {
2042 	      regs.r[3] --;
2043 	      umb = mem_get_qi (regs.r[1]);
2044 	      regs.r[1] += 1;
2045 #ifdef CYCLE_ACCURATE
2046 	      tx ++;
2047 #endif
2048 	      if (umb == uma)
2049 		break;
2050 	    }
2051 #ifdef CYCLE_ACCURATE
2052 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2053 #endif
2054 	  break;
2055 	default:
2056 	  abort();
2057 	}
2058       if (uma == umb)
2059 	set_zc (1, 1);
2060       else
2061 	set_zc (0, ((int)uma - (int)umb) >= 0);
2062       break;
2063 
2064     case RXO_swhile:
2065       RL(3);
2066 #ifdef CYCLE_ACCURATE
2067       tx = 0;
2068 #endif
2069       if (regs.r[3] == 0)
2070 	break;
2071       switch (opcode->size)
2072 	{
2073 	case RX_Long:
2074 	  uma = get_reg (2);
2075 	  while (regs.r[3] != 0)
2076 	    {
2077 	      regs.r[3] --;
2078 	      umb = mem_get_si (get_reg (1));
2079 	      regs.r[1] += 4;
2080 #ifdef CYCLE_ACCURATE
2081 	      tx ++;
2082 #endif
2083 	      if (umb != uma)
2084 		break;
2085 	    }
2086 #ifdef CYCLE_ACCURATE
2087 	  cycles (3 + 3 * tx);
2088 #endif
2089 	  break;
2090 	case RX_Word:
2091 	  uma = get_reg (2) & 0xffff;
2092 	  while (regs.r[3] != 0)
2093 	    {
2094 	      regs.r[3] --;
2095 	      umb = mem_get_hi (get_reg (1));
2096 	      regs.r[1] += 2;
2097 #ifdef CYCLE_ACCURATE
2098 	      tx ++;
2099 #endif
2100 	      if (umb != uma)
2101 		break;
2102 	    }
2103 #ifdef CYCLE_ACCURATE
2104 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2105 #endif
2106 	  break;
2107 	case RX_Byte:
2108 	  uma = get_reg (2) & 0xff;
2109 	  while (regs.r[3] != 0)
2110 	    {
2111 	      regs.r[3] --;
2112 	      umb = mem_get_qi (regs.r[1]);
2113 	      regs.r[1] += 1;
2114 #ifdef CYCLE_ACCURATE
2115 	      tx ++;
2116 #endif
2117 	      if (umb != uma)
2118 		break;
2119 	    }
2120 #ifdef CYCLE_ACCURATE
2121 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2122 #endif
2123 	  break;
2124 	default:
2125 	  abort();
2126 	}
2127       if (uma == umb)
2128 	set_zc (1, 1);
2129       else
2130 	set_zc (0, ((int)uma - (int)umb) >= 0);
2131       break;
2132 
2133     case RXO_wait:
2134       PRIVILEDGED ();
2135       regs.r_psw |= FLAGBIT_I;
2136       DO_RETURN (RX_MAKE_STOPPED(0));
2137 
2138     case RXO_xchg:
2139 #ifdef CYCLE_ACCURATE
2140       regs.m2m = 0;
2141 #endif
2142       v = GS (); /* This is the memory operand, if any.  */
2143       PS (GD ()); /* and this may change the address register.  */
2144       PD (v);
2145       E2;
2146 #ifdef CYCLE_ACCURATE
2147       /* all M cycles happen during xchg's cycles.  */
2148       memory_dest = 0;
2149       memory_source = 0;
2150 #endif
2151       break;
2152 
2153     case RXO_xor:
2154       LOGIC_OP (^);
2155       break;
2156 
2157     default:
2158       EXCEPTION (EX_UNDEFINED);
2159     }
2160 
2161 #ifdef CYCLE_ACCURATE
2162   regs.m2m = 0;
2163   if (memory_source)
2164     regs.m2m |= M2M_SRC;
2165   if (memory_dest)
2166     regs.m2m |= M2M_DST;
2167 
2168   regs.rt = new_rt;
2169   new_rt = -1;
2170 #endif
2171 
2172 #ifdef CYCLE_STATS
2173   if (prev_cycle_count == regs.cycle_count)
2174     {
2175       printf("Cycle count not updated! id %s\n", id_names[opcode->id]);
2176       abort ();
2177     }
2178 #endif
2179 
2180 #ifdef CYCLE_STATS
2181   if (running_benchmark)
2182     {
2183       int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type);
2184 
2185 
2186       cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count;
2187       times_per_id[opcode->id][omap] ++;
2188 
2189       times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++;
2190 
2191       prev_opcode_id = opcode->id;
2192       po0 = omap;
2193     }
2194 #endif
2195 
2196   return RX_MAKE_STEPPED ();
2197 }
2198 
2199 #ifdef CYCLE_STATS
2200 void
2201 reset_pipeline_stats (void)
2202 {
2203   memset (cycles_per_id, 0, sizeof(cycles_per_id));
2204   memset (times_per_id, 0, sizeof(times_per_id));
2205   memory_stalls = 0;
2206   register_stalls = 0;
2207   branch_stalls = 0;
2208   branch_alignment_stalls = 0;
2209   fast_returns = 0;
2210   memset (times_per_pair, 0, sizeof(times_per_pair));
2211   running_benchmark = 1;
2212 
2213   benchmark_start_cycle = regs.cycle_count;
2214 }
2215 
2216 void
2217 halt_pipeline_stats (void)
2218 {
2219   running_benchmark = 0;
2220   benchmark_end_cycle = regs.cycle_count;
2221 }
2222 #endif
2223 
2224 void
2225 pipeline_stats (void)
2226 {
2227 #ifdef CYCLE_STATS
2228   int i, o1;
2229   int p, p1;
2230 #endif
2231 
2232 #ifdef CYCLE_ACCURATE
2233   if (verbose == 1)
2234     {
2235       printf ("cycles: %llu\n", regs.cycle_count);
2236       return;
2237     }
2238 
2239   printf ("cycles: %13s\n", comma (regs.cycle_count));
2240 #endif
2241 
2242 #ifdef CYCLE_STATS
2243   if (benchmark_start_cycle)
2244     printf ("bmark:  %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle));
2245 
2246   printf("\n");
2247   for (i = 0; i < N_RXO; i++)
2248     for (o1 = 0; o1 < N_MAP; o1 ++)
2249       if (times_per_id[i][o1])
2250 	printf("%13s %13s %7.2f  %s %s\n",
2251 	       comma (cycles_per_id[i][o1]),
2252 	       comma (times_per_id[i][o1]),
2253 	       (double)cycles_per_id[i][o1] / times_per_id[i][o1],
2254 	       op_cache_string(o1),
2255 	       id_names[i]+4);
2256 
2257   printf("\n");
2258   for (p = 0; p < N_RXO; p ++)
2259     for (p1 = 0; p1 < N_MAP; p1 ++)
2260       for (i = 0; i < N_RXO; i ++)
2261 	for (o1 = 0; o1 < N_MAP; o1 ++)
2262 	  if (times_per_pair[p][p1][i][o1])
2263 	    {
2264 	      printf("%13s   %s %-9s  ->  %s %s\n",
2265 		     comma (times_per_pair[p][p1][i][o1]),
2266 		     op_cache_string(p1),
2267 		     id_names[p]+4,
2268 		     op_cache_string(o1),
2269 		     id_names[i]+4);
2270 	    }
2271 
2272   printf("\n");
2273   printf("%13s memory stalls\n", comma (memory_stalls));
2274   printf("%13s register stalls\n", comma (register_stalls));
2275   printf("%13s branches taken (non-return)\n", comma (branch_stalls));
2276   printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls));
2277   printf("%13s fast returns\n", comma (fast_returns));
2278 #endif
2279 }
2280