xref: /netbsd-src/external/gpl3/gdb.old/dist/sim/arm/iwmmxt.c (revision 8b657b0747480f8989760d71343d6dd33f8d4cf9)
1 /*  iwmmxt.c -- Intel(r) Wireless MMX(tm) technology co-processor interface.
2     Copyright (C) 2002-2023 Free Software Foundation, Inc.
3     Contributed by matthew green (mrg@redhat.com).
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 3 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 /* This must come before any other includes.  */
19 #include "defs.h"
20 
21 #include <stdlib.h>
22 #include <string.h>
23 
24 #include "armdefs.h"
25 #include "armos.h"
26 #include "armemu.h"
27 #include "ansidecl.h"
28 #include "iwmmxt.h"
29 
30 /* #define DEBUG 1 */
31 
32 /* Intel(r) Wireless MMX(tm) technology co-processor.
33    It uses co-processor numbers (0 and 1).  There are 16 vector registers wRx
34    and 16 control registers wCx.  Co-processors 0 and 1 are used in MCR/MRC
35    to access wRx and wCx respectively.  */
36 
37 static ARMdword wR[16];
38 static ARMword  wC[16] = { 0x69051010 };
39 
40 #define SUBSTR(w,t,m,n) ((t)(w <<  ((sizeof (t) * 8 - 1) - (n))) \
41                                >> (((sizeof (t) * 8 - 1) - (n)) + (m)))
42 #define wCBITS(w,x,y)   SUBSTR (wC[w], ARMword, x, y)
43 #define wRBITS(w,x,y)   SUBSTR (wR[w], ARMdword, x, y)
44 #define wCID   0
45 #define wCon   1
46 #define wCSSF  2
47 #define wCASF  3
48 #define wCGR0  8
49 #define wCGR1  9
50 #define wCGR2 10
51 #define wCGR3 11
52 
53 /* Bits in the wCon register.  */
54 #define WCON_CUP	(1 << 0)
55 #define WCON_MUP	(1 << 1)
56 
57 /* Set the SIMD wCASF flags for 8, 16, 32 or 64-bit operations.  */
58 #define SIMD8_SET(x,  v, n, b)	(x) |= ((v != 0) << ((((b) + 1) * 4) + (n)))
59 #define SIMD16_SET(x, v, n, h)	(x) |= ((v != 0) << ((((h) + 1) * 8) + (n)))
60 #define SIMD32_SET(x, v, n, w)	(x) |= ((v != 0) << ((((w) + 1) * 16) + (n)))
61 #define SIMD64_SET(x, v, n)	(x) |= ((v != 0) << (32 + (n)))
62 
63 /* Flags to pass as "n" above.  */
64 #define SIMD_NBIT	-1
65 #define SIMD_ZBIT	-2
66 #define SIMD_CBIT	-3
67 #define SIMD_VBIT	-4
68 
69 /* Various status bit macros.  */
70 #define NBIT8(x)	((x) & 0x80)
71 #define NBIT16(x)	((x) & 0x8000)
72 #define NBIT32(x)	((x) & 0x80000000)
73 #define NBIT64(x)	((x) & 0x8000000000000000ULL)
74 #define ZBIT8(x)	(((x) & 0xff) == 0)
75 #define ZBIT16(x)	(((x) & 0xffff) == 0)
76 #define ZBIT32(x)	(((x) & 0xffffffff) == 0)
77 #define ZBIT64(x)	(x == 0)
78 
79 /* Access byte/half/word "n" of register "x".  */
80 #define wRBYTE(x,n)	wRBITS ((x), (n) * 8, (n) * 8 + 7)
81 #define wRHALF(x,n)	wRBITS ((x), (n) * 16, (n) * 16 + 15)
82 #define wRWORD(x,n)	wRBITS ((x), (n) * 32, (n) * 32 + 31)
83 
84 /* Macro to handle how the G bit selects wCGR registers.  */
85 #define DECODE_G_BIT(state, instr, shift)	\
86 {						\
87   unsigned int reg;				\
88 						\
89   reg = BITS (0, 3);				\
90 						\
91   if (BIT (8))	/* G */				\
92     {						\
93       if (reg < wCGR0 || reg > wCGR3)		\
94 	{					\
95 	  ARMul_UndefInstr (state, instr);	\
96 	  return ARMul_DONE;			\
97 	}					\
98       shift = wC [reg];				\
99     }						\
100   else						\
101     shift = wR [reg];				\
102 						\
103   shift &= 0xff;				\
104 }
105 
106 /* Index calculations for the satrv[] array.  */
107 #define BITIDX8(x)	(x)
108 #define BITIDX16(x)	(((x) + 1) * 2 - 1)
109 #define BITIDX32(x)	(((x) + 1) * 4 - 1)
110 
111 /* Sign extension macros.  */
112 #define EXTEND8(a)	((a) & 0x80 ? ((a) | 0xffffff00) : (a))
113 #define EXTEND16(a)	((a) & 0x8000 ? ((a) | 0xffff0000) : (a))
114 #define EXTEND32(a)	((a) & 0x80000000ULL ? ((a) | 0xffffffff00000000ULL) : (a))
115 
116 /* Set the wCSSF from 8 values.  */
117 #define SET_wCSSF(a,b,c,d,e,f,g,h) \
118   wC[wCSSF] = (((h) != 0) << 7) | (((g) != 0) << 6) \
119             | (((f) != 0) << 5) | (((e) != 0) << 4) \
120             | (((d) != 0) << 3) | (((c) != 0) << 2) \
121             | (((b) != 0) << 1) | (((a) != 0) << 0);
122 
123 /* Set the wCSSR from an array with 8 values.  */
124 #define SET_wCSSFvec(v) \
125   SET_wCSSF((v)[0],(v)[1],(v)[2],(v)[3],(v)[4],(v)[5],(v)[6],(v)[7])
126 
127 /* Size qualifiers for vector operations.  */
128 #define Bqual 			0
129 #define Hqual 			1
130 #define Wqual 			2
131 #define Dqual 			3
132 
133 /* Saturation qualifiers for vector operations.  */
134 #define NoSaturation 		0
135 #define UnsignedSaturation	1
136 #define SignedSaturation	3
137 
138 
139 /* Prototypes.  */
140 static ARMword         Add32  (ARMword,  ARMword,  int *, int *, ARMword);
141 static ARMdword        AddS32 (ARMdword, ARMdword, int *, int *);
142 static ARMdword        AddU32 (ARMdword, ARMdword, int *, int *);
143 static ARMword         AddS16 (ARMword,  ARMword,  int *, int *);
144 static ARMword         AddU16 (ARMword,  ARMword,  int *, int *);
145 static ARMword         AddS8  (ARMword,  ARMword,  int *, int *);
146 static ARMword         AddU8  (ARMword,  ARMword,  int *, int *);
147 static ARMword         Sub32  (ARMword,  ARMword,  int *, int *, ARMword);
148 static ARMdword        SubS32 (ARMdword, ARMdword, int *, int *);
149 static ARMdword        SubU32 (ARMdword, ARMdword, int *, int *);
150 static ARMword         SubS16 (ARMword,  ARMword,  int *, int *);
151 static ARMword         SubS8  (ARMword,  ARMword,  int *, int *);
152 static ARMword         SubU16 (ARMword,  ARMword,  int *, int *);
153 static ARMword         SubU8  (ARMword,  ARMword,  int *, int *);
154 static unsigned char   IwmmxtSaturateU8  (signed short, int *);
155 static signed char     IwmmxtSaturateS8  (signed short, int *);
156 static unsigned short  IwmmxtSaturateU16 (signed int, int *);
157 static signed short    IwmmxtSaturateS16 (signed int, int *);
158 static unsigned long   IwmmxtSaturateU32 (signed long long, int *);
159 static signed long     IwmmxtSaturateS32 (signed long long, int *);
160 static ARMword         Compute_Iwmmxt_Address   (ARMul_State *, ARMword, int *);
161 static ARMdword        Iwmmxt_Load_Double_Word  (ARMul_State *, ARMword);
162 static ARMword         Iwmmxt_Load_Word         (ARMul_State *, ARMword);
163 static ARMword         Iwmmxt_Load_Half_Word    (ARMul_State *, ARMword);
164 static ARMword         Iwmmxt_Load_Byte         (ARMul_State *, ARMword);
165 static void            Iwmmxt_Store_Double_Word (ARMul_State *, ARMword, ARMdword);
166 static void            Iwmmxt_Store_Word        (ARMul_State *, ARMword, ARMword);
167 static void            Iwmmxt_Store_Half_Word   (ARMul_State *, ARMword, ARMword);
168 static void            Iwmmxt_Store_Byte        (ARMul_State *, ARMword, ARMword);
169 static int             Process_Instruction      (ARMul_State *, ARMword);
170 
171 static int TANDC    (ARMul_State *, ARMword);
172 static int TBCST    (ARMul_State *, ARMword);
173 static int TEXTRC   (ARMul_State *, ARMword);
174 static int TEXTRM   (ARMul_State *, ARMword);
175 static int TINSR    (ARMul_State *, ARMword);
176 static int TMCR     (ARMul_State *, ARMword);
177 static int TMCRR    (ARMul_State *, ARMword);
178 static int TMIA     (ARMul_State *, ARMword);
179 static int TMIAPH   (ARMul_State *, ARMword);
180 static int TMIAxy   (ARMul_State *, ARMword);
181 static int TMOVMSK  (ARMul_State *, ARMword);
182 static int TMRC     (ARMul_State *, ARMword);
183 static int TMRRC    (ARMul_State *, ARMword);
184 static int TORC     (ARMul_State *, ARMword);
185 static int WACC     (ARMul_State *, ARMword);
186 static int WADD     (ARMul_State *, ARMword);
187 static int WALIGNI  (ARMword);
188 static int WALIGNR  (ARMul_State *, ARMword);
189 static int WAND     (ARMword);
190 static int WANDN    (ARMword);
191 static int WAVG2    (ARMword);
192 static int WCMPEQ   (ARMul_State *, ARMword);
193 static int WCMPGT   (ARMul_State *, ARMword);
194 static int WLDR     (ARMul_State *, ARMword);
195 static int WMAC     (ARMword);
196 static int WMADD    (ARMword);
197 static int WMAX     (ARMul_State *, ARMword);
198 static int WMIN     (ARMul_State *, ARMword);
199 static int WMUL     (ARMword);
200 static int WOR      (ARMword);
201 static int WPACK    (ARMul_State *, ARMword);
202 static int WROR     (ARMul_State *, ARMword);
203 static int WSAD     (ARMword);
204 static int WSHUFH   (ARMword);
205 static int WSLL     (ARMul_State *, ARMword);
206 static int WSRA     (ARMul_State *, ARMword);
207 static int WSRL     (ARMul_State *, ARMword);
208 static int WSTR     (ARMul_State *, ARMword);
209 static int WSUB     (ARMul_State *, ARMword);
210 static int WUNPCKEH (ARMul_State *, ARMword);
211 static int WUNPCKEL (ARMul_State *, ARMword);
212 static int WUNPCKIH (ARMul_State *, ARMword);
213 static int WUNPCKIL (ARMul_State *, ARMword);
214 static int WXOR     (ARMword);
215 
216 /* This function does the work of adding two 32bit values
217    together, and calculating if a carry has occurred.  */
218 
219 static ARMword
220 Add32 (ARMword a1,
221        ARMword a2,
222        int * carry_ptr,
223        int * overflow_ptr,
224        ARMword sign_mask)
225 {
226   ARMword result = (a1 + a2);
227   unsigned int uresult = (unsigned int) result;
228   unsigned int ua1 = (unsigned int) a1;
229 
230   /* If (result == a1) and (a2 == 0),
231      or (result > a2) then we have no carry.  */
232   * carry_ptr = ((uresult == ua1) ? (a2 != 0) : (uresult < ua1));
233 
234   /* Overflow occurs when both arguments are the
235      same sign, but the result is a different sign.  */
236   * overflow_ptr = (   ( (result & sign_mask) && !(a1 & sign_mask) && !(a2 & sign_mask))
237 		    || (!(result & sign_mask) &&  (a1 & sign_mask) &&  (a2 & sign_mask)));
238 
239   return result;
240 }
241 
242 static ARMdword
243 AddS32 (ARMdword a1, ARMdword a2, int * carry_ptr, int * overflow_ptr)
244 {
245   ARMdword     result;
246   unsigned int uresult;
247   unsigned int ua1;
248 
249   a1 = EXTEND32 (a1);
250   a2 = EXTEND32 (a2);
251 
252   result  = a1 + a2;
253   uresult = (unsigned int) result;
254   ua1     = (unsigned int) a1;
255 
256   * carry_ptr = ((uresult == a1) ? (a2 != 0) : (uresult < ua1));
257 
258   * overflow_ptr = (   ( (result & 0x80000000ULL) && !(a1 & 0x80000000ULL) && !(a2 & 0x80000000ULL))
259 		    || (!(result & 0x80000000ULL) &&  (a1 & 0x80000000ULL) &&  (a2 & 0x80000000ULL)));
260 
261   return result;
262 }
263 
264 static ARMdword
265 AddU32 (ARMdword a1, ARMdword a2, int * carry_ptr, int * overflow_ptr)
266 {
267   ARMdword     result;
268   unsigned int uresult;
269   unsigned int ua1;
270 
271   a1 &= 0xffffffff;
272   a2 &= 0xffffffff;
273 
274   result  = a1 + a2;
275   uresult = (unsigned int) result;
276   ua1     = (unsigned int) a1;
277 
278   * carry_ptr = ((uresult == a1) ? (a2 != 0) : (uresult < ua1));
279 
280   * overflow_ptr = (   ( (result & 0x80000000ULL) && !(a1 & 0x80000000ULL) && !(a2 & 0x80000000ULL))
281 		    || (!(result & 0x80000000ULL) &&  (a1 & 0x80000000ULL) &&  (a2 & 0x80000000ULL)));
282 
283   return result;
284 }
285 
286 static ARMword
287 AddS16 (ARMword a1, ARMword a2, int * carry_ptr, int * overflow_ptr)
288 {
289   a1 = EXTEND16 (a1);
290   a2 = EXTEND16 (a2);
291 
292   return Add32 (a1, a2, carry_ptr, overflow_ptr, 0x8000);
293 }
294 
295 static ARMword
296 AddU16 (ARMword a1, ARMword a2, int * carry_ptr, int * overflow_ptr)
297 {
298   a1 &= 0xffff;
299   a2 &= 0xffff;
300 
301   return Add32 (a1, a2, carry_ptr, overflow_ptr, 0x8000);
302 }
303 
304 static ARMword
305 AddS8 (ARMword a1, ARMword a2, int * carry_ptr, int * overflow_ptr)
306 {
307   a1 = EXTEND8 (a1);
308   a2 = EXTEND8 (a2);
309 
310   return Add32 (a1, a2, carry_ptr, overflow_ptr, 0x80);
311 }
312 
313 static ARMword
314 AddU8 (ARMword a1, ARMword a2, int * carry_ptr, int * overflow_ptr)
315 {
316   a1 &= 0xff;
317   a2 &= 0xff;
318 
319   return Add32 (a1, a2, carry_ptr, overflow_ptr, 0x80);
320 }
321 
322 static ARMword
323 Sub32 (ARMword a1,
324        ARMword a2,
325        int * borrow_ptr,
326        int * overflow_ptr,
327        ARMword sign_mask)
328 {
329   ARMword result = (a1 - a2);
330   unsigned int ua1 = (unsigned int) a1;
331   unsigned int ua2 = (unsigned int) a2;
332 
333   /* A borrow occurs if a2 is (unsigned) larger than a1.
334      However the carry flag is *cleared* if a borrow occurs.  */
335   * borrow_ptr = ! (ua2 > ua1);
336 
337   /* Overflow occurs when a negative number is subtracted from a
338      positive number and the result is negative or a positive
339      number is subtracted from a negative number and the result is
340      positive.  */
341   * overflow_ptr = ( (! (a1 & sign_mask) &&   (a2 & sign_mask) &&   (result & sign_mask))
342 		    || ((a1 & sign_mask) && ! (a2 & sign_mask) && ! (result & sign_mask)));
343 
344   return result;
345 }
346 
347 static ARMdword
348 SubS32 (ARMdword a1, ARMdword a2, int * borrow_ptr, int * overflow_ptr)
349 {
350   ARMdword     result;
351   unsigned int ua1;
352   unsigned int ua2;
353 
354   a1 = EXTEND32 (a1);
355   a2 = EXTEND32 (a2);
356 
357   result = a1 - a2;
358   ua1    = (unsigned int) a1;
359   ua2    = (unsigned int) a2;
360 
361   * borrow_ptr = ! (ua2 > ua1);
362 
363   * overflow_ptr = ( (! (a1 & 0x80000000ULL) &&   (a2 & 0x80000000ULL) &&   (result & 0x80000000ULL))
364 		    || ((a1 & 0x80000000ULL) && ! (a2 & 0x80000000ULL) && ! (result & 0x80000000ULL)));
365 
366   return result;
367 }
368 
369 static ARMword
370 SubS16 (ARMword a1, ARMword a2, int * carry_ptr, int * overflow_ptr)
371 {
372   a1 = EXTEND16 (a1);
373   a2 = EXTEND16 (a2);
374 
375   return Sub32 (a1, a2, carry_ptr, overflow_ptr, 0x8000);
376 }
377 
378 static ARMword
379 SubS8 (ARMword a1, ARMword a2, int * carry_ptr, int * overflow_ptr)
380 {
381   a1 = EXTEND8 (a1);
382   a2 = EXTEND8 (a2);
383 
384   return Sub32 (a1, a2, carry_ptr, overflow_ptr, 0x80);
385 }
386 
387 static ARMword
388 SubU16 (ARMword a1, ARMword a2, int * carry_ptr, int * overflow_ptr)
389 {
390   a1 &= 0xffff;
391   a2 &= 0xffff;
392 
393   return Sub32 (a1, a2, carry_ptr, overflow_ptr, 0x8000);
394 }
395 
396 static ARMword
397 SubU8 (ARMword a1, ARMword a2, int * carry_ptr, int * overflow_ptr)
398 {
399   a1 &= 0xff;
400   a2 &= 0xff;
401 
402   return Sub32 (a1, a2, carry_ptr, overflow_ptr, 0x80);
403 }
404 
405 static ARMdword
406 SubU32 (ARMdword a1, ARMdword a2, int * borrow_ptr, int * overflow_ptr)
407 {
408   ARMdword     result;
409   unsigned int ua1;
410   unsigned int ua2;
411 
412   a1 &= 0xffffffff;
413   a2 &= 0xffffffff;
414 
415   result = a1 - a2;
416   ua1    = (unsigned int) a1;
417   ua2    = (unsigned int) a2;
418 
419   * borrow_ptr = ! (ua2 > ua1);
420 
421   * overflow_ptr = ( (! (a1 & 0x80000000ULL) &&   (a2 & 0x80000000ULL) &&   (result & 0x80000000ULL))
422 		    || ((a1 & 0x80000000ULL) && ! (a2 & 0x80000000ULL) && ! (result & 0x80000000ULL)));
423 
424   return result;
425 }
426 
427 /* For the saturation.  */
428 
429 static unsigned char
430 IwmmxtSaturateU8 (signed short val, int * sat)
431 {
432   unsigned char rv;
433 
434   if (val < 0)
435     {
436       rv = 0;
437       *sat = 1;
438     }
439   else if (val > 0xff)
440     {
441       rv = 0xff;
442       *sat = 1;
443     }
444   else
445     {
446       rv = val & 0xff;
447       *sat = 0;
448     }
449   return rv;
450 }
451 
452 static signed char
453 IwmmxtSaturateS8 (signed short val, int * sat)
454 {
455   signed char rv;
456 
457   if (val < -0x80)
458     {
459       rv = -0x80;
460       *sat = 1;
461     }
462   else if (val > 0x7f)
463     {
464       rv = 0x7f;
465       *sat = 1;
466     }
467   else
468     {
469       rv = val & 0xff;
470       *sat = 0;
471     }
472   return rv;
473 }
474 
475 static unsigned short
476 IwmmxtSaturateU16 (signed int val, int * sat)
477 {
478   unsigned short rv;
479 
480   if (val < 0)
481     {
482       rv = 0;
483       *sat = 1;
484     }
485   else if (val > 0xffff)
486     {
487       rv = 0xffff;
488       *sat = 1;
489     }
490   else
491     {
492       rv = val & 0xffff;
493       *sat = 0;
494     }
495   return rv;
496 }
497 
498 static signed short
499 IwmmxtSaturateS16 (signed int val, int * sat)
500 {
501   signed short rv;
502 
503   if (val < -0x8000)
504     {
505       rv = - 0x8000;
506       *sat = 1;
507     }
508   else if (val > 0x7fff)
509     {
510       rv = 0x7fff;
511       *sat = 1;
512     }
513   else
514     {
515       rv = val & 0xffff;
516       *sat = 0;
517     }
518   return rv;
519 }
520 
521 static unsigned long
522 IwmmxtSaturateU32 (signed long long val, int * sat)
523 {
524   unsigned long rv;
525 
526   if (val < 0)
527     {
528       rv = 0;
529       *sat = 1;
530     }
531   else if (val > 0xffffffff)
532     {
533       rv = 0xffffffff;
534       *sat = 1;
535     }
536   else
537     {
538       rv = val & 0xffffffff;
539       *sat = 0;
540     }
541   return rv;
542 }
543 
544 static signed long
545 IwmmxtSaturateS32 (signed long long val, int * sat)
546 {
547   signed long rv;
548 
549   if (val < -0x80000000LL)
550     {
551       rv = -0x80000000;
552       *sat = 1;
553     }
554   else if (val > 0x7fffffff)
555     {
556       rv = 0x7fffffff;
557       *sat = 1;
558     }
559   else
560     {
561       rv = val & 0xffffffff;
562       *sat = 0;
563     }
564   return rv;
565 }
566 
567 /* Intel(r) Wireless MMX(tm) technology Acessor functions.  */
568 
569 unsigned
570 IwmmxtLDC (ARMul_State * state ATTRIBUTE_UNUSED,
571 	   unsigned      type  ATTRIBUTE_UNUSED,
572 	   ARMword       instr,
573 	   ARMword       data)
574 {
575   return ARMul_CANT;
576 }
577 
578 unsigned
579 IwmmxtSTC (ARMul_State * state ATTRIBUTE_UNUSED,
580 	   unsigned      type  ATTRIBUTE_UNUSED,
581 	   ARMword       instr,
582 	   ARMword *     data)
583 {
584   return ARMul_CANT;
585 }
586 
587 unsigned
588 IwmmxtMRC (ARMul_State * state ATTRIBUTE_UNUSED,
589 	   unsigned      type  ATTRIBUTE_UNUSED,
590 	   ARMword       instr,
591 	   ARMword *     value)
592 {
593   return ARMul_CANT;
594 }
595 
596 unsigned
597 IwmmxtMCR (ARMul_State * state ATTRIBUTE_UNUSED,
598 	   unsigned      type  ATTRIBUTE_UNUSED,
599 	   ARMword       instr,
600 	   ARMword       value)
601 {
602   return ARMul_CANT;
603 }
604 
605 unsigned
606 IwmmxtCDP (ARMul_State * state, unsigned type, ARMword instr)
607 {
608   return ARMul_CANT;
609 }
610 
611 /* Intel(r) Wireless MMX(tm) technology instruction implementations.  */
612 
613 static int
614 TANDC (ARMul_State * state, ARMword instr)
615 {
616   ARMword cpsr;
617 
618   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
619     return ARMul_CANT;
620 
621 #ifdef DEBUG
622   fprintf (stderr, "tandc\n");
623 #endif
624 
625   /* The Rd field must be r15.  */
626   if (BITS (12, 15) != 15)
627     return ARMul_CANT;
628 
629   /* The CRn field must be r3.  */
630   if (BITS (16, 19) != 3)
631     return ARMul_CANT;
632 
633   /* The CRm field must be r0.  */
634   if (BITS (0, 3) != 0)
635     return ARMul_CANT;
636 
637   cpsr = ARMul_GetCPSR (state) & 0x0fffffff;
638 
639   switch (BITS (22, 23))
640     {
641     case Bqual:
642       cpsr |= (  (wCBITS (wCASF, 28, 31) & wCBITS (wCASF, 24, 27)
643 		& wCBITS (wCASF, 20, 23) & wCBITS (wCASF, 16, 19)
644 		& wCBITS (wCASF, 12, 15) & wCBITS (wCASF,  8, 11)
645 		& wCBITS (wCASF,  4,  7) & wCBITS (wCASF,  0,  3)) << 28);
646       break;
647 
648     case Hqual:
649       cpsr |= (  (wCBITS (wCASF, 28, 31) & wCBITS (wCASF, 20, 23)
650 		& wCBITS (wCASF, 12, 15) & wCBITS (wCASF,  4, 7)) << 28);
651       break;
652 
653     case Wqual:
654       cpsr |= ((wCBITS (wCASF, 28, 31) & wCBITS (wCASF, 12, 15)) << 28);
655       break;
656 
657     default:
658       ARMul_UndefInstr (state, instr);
659       return ARMul_DONE;
660     }
661 
662   ARMul_SetCPSR (state, cpsr);
663 
664   return ARMul_DONE;
665 }
666 
667 static int
668 TBCST (ARMul_State * state, ARMword instr)
669 {
670   ARMdword Rn;
671   int wRd;
672 
673   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
674     return ARMul_CANT;
675 
676 #ifdef DEBUG
677   fprintf (stderr, "tbcst\n");
678 #endif
679 
680   Rn  = state->Reg [BITS (12, 15)];
681   if (BITS (12, 15) == 15)
682     Rn &= 0xfffffffc;
683 
684   wRd = BITS (16, 19);
685 
686   switch (BITS (6, 7))
687     {
688     case Bqual:
689       Rn &= 0xff;
690       wR [wRd] = (Rn << 56) | (Rn << 48) | (Rn << 40) | (Rn << 32)
691 	       | (Rn << 24) | (Rn << 16) | (Rn << 8) | Rn;
692       break;
693 
694     case Hqual:
695       Rn &= 0xffff;
696       wR [wRd] = (Rn << 48) | (Rn << 32) | (Rn << 16) | Rn;
697       break;
698 
699     case Wqual:
700       Rn &= 0xffffffff;
701       wR [wRd] = (Rn << 32) | Rn;
702       break;
703 
704     default:
705       ARMul_UndefInstr (state, instr);
706       break;
707     }
708 
709   wC [wCon] |= WCON_MUP;
710   return ARMul_DONE;
711 }
712 
713 static int
714 TEXTRC (ARMul_State * state, ARMword instr)
715 {
716   ARMword cpsr;
717   ARMword selector;
718 
719   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
720     return ARMul_CANT;
721 
722 #ifdef DEBUG
723   fprintf (stderr, "textrc\n");
724 #endif
725 
726   /* The Rd field must be r15.  */
727   if (BITS (12, 15) != 15)
728     return ARMul_CANT;
729 
730   /* The CRn field must be r3.  */
731   if (BITS (16, 19) != 3)
732     return ARMul_CANT;
733 
734   /* The CRm field must be 0xxx.  */
735   if (BIT (3) != 0)
736     return ARMul_CANT;
737 
738   selector = BITS (0, 2);
739   cpsr = ARMul_GetCPSR (state) & 0x0fffffff;
740 
741   switch (BITS (22, 23))
742     {
743     case Bqual: selector *= 4; break;
744     case Hqual: selector = ((selector & 3) * 8) + 4; break;
745     case Wqual: selector = ((selector & 1) * 16) + 12; break;
746 
747     default:
748       ARMul_UndefInstr (state, instr);
749       return ARMul_DONE;
750     }
751 
752   cpsr |= wCBITS (wCASF, selector, selector + 3) << 28;
753   ARMul_SetCPSR (state, cpsr);
754 
755   return ARMul_DONE;
756 }
757 
758 static int
759 TEXTRM (ARMul_State * state, ARMword instr)
760 {
761   ARMword Rd;
762   int     offset;
763   int     wRn;
764   int     sign;
765 
766   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
767     return ARMul_CANT;
768 
769 #ifdef DEBUG
770   fprintf (stderr, "textrm\n");
771 #endif
772 
773   wRn    = BITS (16, 19);
774   sign   = BIT (3);
775   offset = BITS (0, 2);
776 
777   switch (BITS (22, 23))
778     {
779     case Bqual:
780       offset *= 8;
781       Rd = wRBITS (wRn, offset, offset + 7);
782       if (sign)
783 	Rd = EXTEND8 (Rd);
784       break;
785 
786     case Hqual:
787       offset = (offset & 3) * 16;
788       Rd = wRBITS (wRn, offset, offset + 15);
789       if (sign)
790 	Rd = EXTEND16 (Rd);
791       break;
792 
793     case Wqual:
794       offset = (offset & 1) * 32;
795       Rd = wRBITS (wRn, offset, offset + 31);
796       break;
797 
798     default:
799       ARMul_UndefInstr (state, instr);
800       return ARMul_DONE;
801     }
802 
803   if (BITS (12, 15) == 15)
804     ARMul_UndefInstr (state, instr);
805   else
806     state->Reg [BITS (12, 15)] = Rd;
807 
808   return ARMul_DONE;
809 }
810 
811 static int
812 TINSR (ARMul_State * state, ARMword instr)
813 {
814   ARMdword data;
815   ARMword  offset;
816   int      wRd;
817 
818   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
819     return ARMul_CANT;
820 
821 #ifdef DEBUG
822   fprintf (stderr, "tinsr\n");
823 #endif
824 
825   wRd = BITS (16, 19);
826   data = state->Reg [BITS (12, 15)];
827   offset = BITS (0, 2);
828 
829   switch (BITS (6, 7))
830     {
831     case Bqual:
832       data &= 0xff;
833       switch (offset)
834 	{
835 	case 0: wR [wRd] = data | (wRBITS (wRd, 8, 63) << 8); break;
836 	case 1: wR [wRd] = wRBITS (wRd, 0,  7) | (data <<  8) | (wRBITS (wRd, 16, 63) << 16); break;
837 	case 2: wR [wRd] = wRBITS (wRd, 0, 15) | (data << 16) | (wRBITS (wRd, 24, 63) << 24); break;
838 	case 3: wR [wRd] = wRBITS (wRd, 0, 23) | (data << 24) | (wRBITS (wRd, 32, 63) << 32); break;
839 	case 4: wR [wRd] = wRBITS (wRd, 0, 31) | (data << 32) | (wRBITS (wRd, 40, 63) << 40); break;
840 	case 5: wR [wRd] = wRBITS (wRd, 0, 39) | (data << 40) | (wRBITS (wRd, 48, 63) << 48); break;
841 	case 6: wR [wRd] = wRBITS (wRd, 0, 47) | (data << 48) | (wRBITS (wRd, 56, 63) << 56); break;
842 	case 7: wR [wRd] = wRBITS (wRd, 0, 55) | (data << 56); break;
843 	}
844       break;
845 
846     case Hqual:
847       data &= 0xffff;
848 
849       switch (offset & 3)
850 	{
851 	case 0: wR [wRd] = data | (wRBITS (wRd, 16, 63) << 16); break;
852 	case 1: wR [wRd] = wRBITS (wRd, 0, 15) | (data << 16) | (wRBITS (wRd, 32, 63) << 32); break;
853 	case 2: wR [wRd] = wRBITS (wRd, 0, 31) | (data << 32) | (wRBITS (wRd, 48, 63) << 48); break;
854 	case 3: wR [wRd] = wRBITS (wRd, 0, 47) | (data << 48); break;
855 	}
856       break;
857 
858     case Wqual:
859       if (offset & 1)
860 	wR [wRd] = wRBITS (wRd, 0, 31) | (data << 32);
861       else
862 	wR [wRd] = (wRBITS (wRd, 32, 63) << 32) | data;
863       break;
864 
865     default:
866       ARMul_UndefInstr (state, instr);
867       break;
868     }
869 
870   wC [wCon] |= WCON_MUP;
871   return ARMul_DONE;
872 }
873 
874 static int
875 TMCR (ARMul_State * state, ARMword instr)
876 {
877   ARMword val;
878   int     wCreg;
879 
880   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
881     return ARMul_CANT;
882 
883 #ifdef DEBUG
884   fprintf (stderr, "tmcr\n");
885 #endif
886 
887   if (BITS (0, 3) != 0)
888     return ARMul_CANT;
889 
890   val = state->Reg [BITS (12, 15)];
891   if (BITS (12, 15) == 15)
892     val &= 0xfffffffc;
893 
894   wCreg = BITS (16, 19);
895 
896   switch (wCreg)
897     {
898     case wCID:
899       /* The wCID register is read only.  */
900       break;
901 
902     case wCon:
903       /* Writing to the MUP or CUP bits clears them.  */
904       wC [wCon] &= ~ (val & 0x3);
905       break;
906 
907     case wCSSF:
908       /* Only the bottom 8 bits can be written to.
909           The higher bits write as zero.  */
910       wC [wCSSF] = (val & 0xff);
911       wC [wCon] |= WCON_CUP;
912       break;
913 
914     default:
915       wC [wCreg] = val;
916       wC [wCon] |= WCON_CUP;
917       break;
918     }
919 
920   return ARMul_DONE;
921 }
922 
923 static int
924 TMCRR (ARMul_State * state, ARMword instr)
925 {
926   ARMdword RdHi = state->Reg [BITS (16, 19)];
927   ARMword  RdLo = state->Reg [BITS (12, 15)];
928 
929   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
930     return ARMul_CANT;
931 
932 #ifdef DEBUG
933   fprintf (stderr, "tmcrr\n");
934 #endif
935 
936   if ((BITS (16, 19) == 15) || (BITS (12, 15) == 15))
937     return ARMul_CANT;
938 
939   wR [BITS (0, 3)] = (RdHi << 32) | RdLo;
940 
941   wC [wCon] |= WCON_MUP;
942 
943   return ARMul_DONE;
944 }
945 
946 static int
947 TMIA (ARMul_State * state, ARMword instr)
948 {
949   signed long long a, b;
950 
951   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
952     return ARMul_CANT;
953 
954 #ifdef DEBUG
955   fprintf (stderr, "tmia\n");
956 #endif
957 
958   if ((BITS (0, 3) == 15) || (BITS (12, 15) == 15))
959     {
960       ARMul_UndefInstr (state, instr);
961       return ARMul_DONE;
962     }
963 
964   a = state->Reg [BITS (0, 3)];
965   b = state->Reg [BITS (12, 15)];
966 
967   a = EXTEND32 (a);
968   b = EXTEND32 (b);
969 
970   wR [BITS (5, 8)] += a * b;
971   wC [wCon] |= WCON_MUP;
972 
973   return ARMul_DONE;
974 }
975 
976 static int
977 TMIAPH (ARMul_State * state, ARMword instr)
978 {
979   signed long a, b, result;
980   signed long long r;
981   ARMword Rm = state->Reg [BITS (0, 3)];
982   ARMword Rs = state->Reg [BITS (12, 15)];
983 
984   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
985     return ARMul_CANT;
986 
987 #ifdef DEBUG
988   fprintf (stderr, "tmiaph\n");
989 #endif
990 
991   if (BITS (0, 3) == 15 || BITS (12, 15) == 15)
992     {
993       ARMul_UndefInstr (state, instr);
994       return ARMul_DONE;
995     }
996 
997   a = SUBSTR (Rs, ARMword, 16, 31);
998   b = SUBSTR (Rm, ARMword, 16, 31);
999 
1000   a = EXTEND16 (a);
1001   b = EXTEND16 (b);
1002 
1003   result = a * b;
1004 
1005   r = result;
1006   r = EXTEND32 (r);
1007 
1008   wR [BITS (5, 8)] += r;
1009 
1010   a = SUBSTR (Rs, ARMword,  0, 15);
1011   b = SUBSTR (Rm, ARMword,  0, 15);
1012 
1013   a = EXTEND16 (a);
1014   b = EXTEND16 (b);
1015 
1016   result = a * b;
1017 
1018   r = result;
1019   r = EXTEND32 (r);
1020 
1021   wR [BITS (5, 8)] += r;
1022   wC [wCon] |= WCON_MUP;
1023 
1024   return ARMul_DONE;
1025 }
1026 
1027 static int
1028 TMIAxy (ARMul_State * state, ARMword instr)
1029 {
1030   ARMword Rm;
1031   ARMword Rs;
1032   long long temp;
1033 
1034   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1035     return ARMul_CANT;
1036 
1037 #ifdef DEBUG
1038   fprintf (stderr, "tmiaxy\n");
1039 #endif
1040 
1041   if (BITS (0, 3) == 15 || BITS (12, 15) == 15)
1042     {
1043       ARMul_UndefInstr (state, instr);
1044       return ARMul_DONE;
1045     }
1046 
1047   Rm = state->Reg [BITS (0, 3)];
1048   if (BIT (17))
1049     Rm >>= 16;
1050   else
1051     Rm &= 0xffff;
1052 
1053   Rs = state->Reg [BITS (12, 15)];
1054   if (BIT (16))
1055     Rs >>= 16;
1056   else
1057     Rs &= 0xffff;
1058 
1059   if (Rm & (1 << 15))
1060     Rm -= 1 << 16;
1061 
1062   if (Rs & (1 << 15))
1063     Rs -= 1 << 16;
1064 
1065   Rm *= Rs;
1066   temp = Rm;
1067 
1068   if (temp & (1 << 31))
1069     temp -= 1ULL << 32;
1070 
1071   wR [BITS (5, 8)] += temp;
1072   wC [wCon] |= WCON_MUP;
1073 
1074   return ARMul_DONE;
1075 }
1076 
1077 static int
1078 TMOVMSK (ARMul_State * state, ARMword instr)
1079 {
1080   ARMdword result;
1081   int      wRn;
1082 
1083   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1084     return ARMul_CANT;
1085 
1086 #ifdef DEBUG
1087   fprintf (stderr, "tmovmsk\n");
1088 #endif
1089 
1090   /* The CRm field must be r0.  */
1091   if (BITS (0, 3) != 0)
1092     return ARMul_CANT;
1093 
1094   wRn = BITS (16, 19);
1095 
1096   switch (BITS (22, 23))
1097     {
1098     case Bqual:
1099       result = (  (wRBITS (wRn, 63, 63) << 7)
1100 		| (wRBITS (wRn, 55, 55) << 6)
1101 		| (wRBITS (wRn, 47, 47) << 5)
1102 		| (wRBITS (wRn, 39, 39) << 4)
1103 		| (wRBITS (wRn, 31, 31) << 3)
1104 		| (wRBITS (wRn, 23, 23) << 2)
1105 		| (wRBITS (wRn, 15, 15) << 1)
1106 		| (wRBITS (wRn,  7,  7) << 0));
1107       break;
1108 
1109     case Hqual:
1110       result = (  (wRBITS (wRn, 63, 63) << 3)
1111 		| (wRBITS (wRn, 47, 47) << 2)
1112 		| (wRBITS (wRn, 31, 31) << 1)
1113 		| (wRBITS (wRn, 15, 15) << 0));
1114       break;
1115 
1116     case Wqual:
1117       result = (wRBITS (wRn, 63, 63) << 1) | wRBITS (wRn, 31, 31);
1118       break;
1119 
1120     default:
1121       ARMul_UndefInstr (state, instr);
1122       return ARMul_DONE;
1123     }
1124 
1125   state->Reg [BITS (12, 15)] = result;
1126 
1127   return ARMul_DONE;
1128 }
1129 
1130 static int
1131 TMRC (ARMul_State * state, ARMword instr)
1132 {
1133   int reg = BITS (12, 15);
1134 
1135   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1136     return ARMul_CANT;
1137 
1138 #ifdef DEBUG
1139   fprintf (stderr, "tmrc\n");
1140 #endif
1141 
1142   if (BITS (0, 3) != 0)
1143     return ARMul_CANT;
1144 
1145   if (reg == 15)
1146     ARMul_UndefInstr (state, instr);
1147   else
1148     state->Reg [reg] = wC [BITS (16, 19)];
1149 
1150   return ARMul_DONE;
1151 }
1152 
1153 static int
1154 TMRRC (ARMul_State * state, ARMword instr)
1155 {
1156   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1157     return ARMul_CANT;
1158 
1159 #ifdef DEBUG
1160   fprintf (stderr, "tmrrc\n");
1161 #endif
1162 
1163   if ((BITS (16, 19) == 15) || (BITS (12, 15) == 15) || (BITS (4, 11) != 0))
1164     ARMul_UndefInstr (state, instr);
1165   else
1166     {
1167       state->Reg [BITS (16, 19)] = wRBITS (BITS (0, 3), 32, 63);
1168       state->Reg [BITS (12, 15)] = wRBITS (BITS (0, 3),  0, 31);
1169     }
1170 
1171   return ARMul_DONE;
1172 }
1173 
1174 static int
1175 TORC (ARMul_State * state, ARMword instr)
1176 {
1177   ARMword cpsr = ARMul_GetCPSR (state);
1178 
1179   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1180     return ARMul_CANT;
1181 
1182 #ifdef DEBUG
1183   fprintf (stderr, "torc\n");
1184 #endif
1185 
1186   /* The Rd field must be r15.  */
1187   if (BITS (12, 15) != 15)
1188     return ARMul_CANT;
1189 
1190   /* The CRn field must be r3.  */
1191   if (BITS (16, 19) != 3)
1192     return ARMul_CANT;
1193 
1194   /* The CRm field must be r0.  */
1195   if (BITS (0, 3) != 0)
1196     return ARMul_CANT;
1197 
1198   cpsr &= 0x0fffffff;
1199 
1200   switch (BITS (22, 23))
1201     {
1202     case Bqual:
1203       cpsr |= (  (wCBITS (wCASF, 28, 31) | wCBITS (wCASF, 24, 27)
1204 		| wCBITS (wCASF, 20, 23) | wCBITS (wCASF, 16, 19)
1205 		| wCBITS (wCASF, 12, 15) | wCBITS (wCASF,  8, 11)
1206 		| wCBITS (wCASF,  4,  7) | wCBITS (wCASF,  0,  3)) << 28);
1207       break;
1208 
1209     case Hqual:
1210       cpsr |= (  (wCBITS (wCASF, 28, 31) | wCBITS (wCASF, 20, 23)
1211 		| wCBITS (wCASF, 12, 15) | wCBITS (wCASF,  4,  7)) << 28);
1212       break;
1213 
1214     case Wqual:
1215       cpsr |= ((wCBITS (wCASF, 28, 31) | wCBITS (wCASF, 12, 15)) << 28);
1216       break;
1217 
1218     default:
1219       ARMul_UndefInstr (state, instr);
1220       return ARMul_DONE;
1221     }
1222 
1223   ARMul_SetCPSR (state, cpsr);
1224 
1225   return ARMul_DONE;
1226 }
1227 
1228 static int
1229 WACC (ARMul_State * state, ARMword instr)
1230 {
1231   int wRn;
1232 
1233   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1234     return ARMul_CANT;
1235 
1236 #ifdef DEBUG
1237   fprintf (stderr, "wacc\n");
1238 #endif
1239 
1240   wRn = BITS (16, 19);
1241 
1242   switch (BITS (22, 23))
1243     {
1244     case Bqual:
1245       wR [BITS (12, 15)] =
1246 	  wRBITS (wRn, 56, 63) + wRBITS (wRn, 48, 55)
1247 	+ wRBITS (wRn, 40, 47) + wRBITS (wRn, 32, 39)
1248 	+ wRBITS (wRn, 24, 31) + wRBITS (wRn, 16, 23)
1249 	+ wRBITS (wRn,  8, 15) + wRBITS (wRn,  0,  7);
1250       break;
1251 
1252     case Hqual:
1253       wR [BITS (12, 15)] =
1254 	  wRBITS (wRn, 48, 63) + wRBITS (wRn, 32, 47)
1255 	+ wRBITS (wRn, 16, 31) + wRBITS (wRn,  0, 15);
1256       break;
1257 
1258     case Wqual:
1259       wR [BITS (12, 15)] = wRBITS (wRn, 32, 63) + wRBITS (wRn, 0, 31);
1260       break;
1261 
1262     default:
1263       ARMul_UndefInstr (state, instr);
1264       break;
1265     }
1266 
1267   wC [wCon] |= WCON_MUP;
1268   return ARMul_DONE;
1269 }
1270 
1271 static int
1272 WADD (ARMul_State * state, ARMword instr)
1273 {
1274   ARMdword r = 0;
1275   ARMdword x;
1276   ARMdword s;
1277   ARMword  psr = 0;
1278   int      i;
1279   int      carry;
1280   int      overflow;
1281   int      satrv[8];
1282 
1283   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1284     return ARMul_CANT;
1285 
1286 #ifdef DEBUG
1287   fprintf (stderr, "wadd\n");
1288 #endif
1289 
1290   /* Add two numbers using the specified function,
1291      leaving setting the carry bit as required.  */
1292 #define ADDx(x, y, m, f) \
1293    (*f) (wRBITS (BITS (16, 19), (x), (y)) & (m), \
1294          wRBITS (BITS ( 0,  3), (x), (y)) & (m), \
1295         & carry, & overflow)
1296 
1297   switch (BITS (22, 23))
1298     {
1299     case Bqual:
1300       for (i = 0; i < 8; i++)
1301         {
1302 	  switch (BITS (20, 21))
1303 	    {
1304 	    case NoSaturation:
1305 	      s = ADDx ((i * 8), (i * 8) + 7, 0xff, AddS8);
1306 	      satrv [BITIDX8 (i)] = 0;
1307 	      r |= (s & 0xff) << (i * 8);
1308 	      SIMD8_SET (psr, NBIT8 (s), SIMD_NBIT, i);
1309 	      SIMD8_SET (psr, ZBIT8 (s), SIMD_ZBIT, i);
1310 	      SIMD8_SET (psr, carry,     SIMD_CBIT, i);
1311 	      SIMD8_SET (psr, overflow,  SIMD_VBIT, i);
1312 	      break;
1313 
1314 	    case UnsignedSaturation:
1315 	      s = ADDx ((i * 8), (i * 8) + 7, 0xff, AddU8);
1316 	      x = IwmmxtSaturateU8 (s, satrv + BITIDX8 (i));
1317 	      r |= (x & 0xff) << (i * 8);
1318 	      SIMD8_SET (psr, NBIT8 (x), SIMD_NBIT, i);
1319 	      SIMD8_SET (psr, ZBIT8 (x), SIMD_ZBIT, i);
1320 	      if (! satrv [BITIDX8 (i)])
1321 		{
1322 		  SIMD8_SET (psr, carry,    SIMD_CBIT, i);
1323 		  SIMD8_SET (psr, overflow, SIMD_VBIT, i);
1324 		}
1325 	      break;
1326 
1327 	    case SignedSaturation:
1328 	      s = ADDx ((i * 8), (i * 8) + 7, 0xff, AddS8);
1329 	      x = IwmmxtSaturateS8 (s, satrv + BITIDX8 (i));
1330 	      r |= (x & 0xff) << (i * 8);
1331 	      SIMD8_SET (psr, NBIT8 (x), SIMD_NBIT, i);
1332 	      SIMD8_SET (psr, ZBIT8 (x), SIMD_ZBIT, i);
1333 	      if (! satrv [BITIDX8 (i)])
1334 		{
1335 		  SIMD8_SET (psr, carry,    SIMD_CBIT, i);
1336 		  SIMD8_SET (psr, overflow, SIMD_VBIT, i);
1337 		}
1338 	      break;
1339 
1340 	    default:
1341 	      ARMul_UndefInstr (state, instr);
1342 	      return ARMul_DONE;
1343 	    }
1344 	}
1345       break;
1346 
1347     case Hqual:
1348       satrv[0] = satrv[2] = satrv[4] = satrv[6] = 0;
1349 
1350       for (i = 0; i < 4; i++)
1351 	{
1352 	  switch (BITS (20, 21))
1353 	    {
1354 	    case NoSaturation:
1355 	      s = ADDx ((i * 16), (i * 16) + 15, 0xffff, AddS16);
1356 	      satrv [BITIDX16 (i)] = 0;
1357 	      r |= (s & 0xffff) << (i * 16);
1358 	      SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
1359 	      SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
1360 	      SIMD16_SET (psr, carry,      SIMD_CBIT, i);
1361 	      SIMD16_SET (psr, overflow,   SIMD_VBIT, i);
1362 	      break;
1363 
1364 	    case UnsignedSaturation:
1365 	      s = ADDx ((i * 16), (i * 16) + 15, 0xffff, AddU16);
1366 	      x = IwmmxtSaturateU16 (s, satrv + BITIDX16 (i));
1367 	      r |= (x & 0xffff) << (i * 16);
1368 	      SIMD16_SET (psr, NBIT16 (x), SIMD_NBIT, i);
1369 	      SIMD16_SET (psr, ZBIT16 (x), SIMD_ZBIT, i);
1370 	      if (! satrv [BITIDX16 (i)])
1371 		{
1372 		  SIMD16_SET (psr, carry,    SIMD_CBIT, i);
1373 		  SIMD16_SET (psr, overflow, SIMD_VBIT, i);
1374 		}
1375 	      break;
1376 
1377 	    case SignedSaturation:
1378 	      s = ADDx ((i * 16), (i * 16) + 15, 0xffff, AddS16);
1379 	      x = IwmmxtSaturateS16 (s, satrv + BITIDX16 (i));
1380 	      r |= (x & 0xffff) << (i * 16);
1381 	      SIMD16_SET (psr, NBIT16 (x), SIMD_NBIT, i);
1382 	      SIMD16_SET (psr, ZBIT16 (x), SIMD_ZBIT, i);
1383 	      if (! satrv [BITIDX16 (i)])
1384 		{
1385 		  SIMD16_SET (psr, carry,    SIMD_CBIT, i);
1386 		  SIMD16_SET (psr, overflow, SIMD_VBIT, i);
1387 		}
1388 	      break;
1389 
1390 	    default:
1391 	      ARMul_UndefInstr (state, instr);
1392 	      return ARMul_DONE;
1393 	    }
1394 	}
1395       break;
1396 
1397     case Wqual:
1398       satrv[0] = satrv[1] = satrv[2] = satrv[4] = satrv[5] = satrv[6] = 0;
1399 
1400       for (i = 0; i < 2; i++)
1401 	{
1402 	  switch (BITS (20, 21))
1403 	    {
1404 	    case NoSaturation:
1405 	      s = ADDx ((i * 32), (i * 32) + 31, 0xffffffff, AddS32);
1406 	      satrv [BITIDX32 (i)] = 0;
1407 	      r |= (s & 0xffffffff) << (i * 32);
1408 	      SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
1409 	      SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
1410 	      SIMD32_SET (psr, carry,      SIMD_CBIT, i);
1411 	      SIMD32_SET (psr, overflow,   SIMD_VBIT, i);
1412 	      break;
1413 
1414 	    case UnsignedSaturation:
1415 	      s = ADDx ((i * 32), (i * 32) + 31, 0xffffffff, AddU32);
1416 	      x = IwmmxtSaturateU32 (s, satrv + BITIDX32 (i));
1417 	      r |= (x & 0xffffffff) << (i * 32);
1418 	      SIMD32_SET (psr, NBIT32 (x), SIMD_NBIT, i);
1419 	      SIMD32_SET (psr, ZBIT32 (x), SIMD_ZBIT, i);
1420 	      if (! satrv [BITIDX32 (i)])
1421 		{
1422 		  SIMD32_SET (psr, carry,    SIMD_CBIT, i);
1423 		  SIMD32_SET (psr, overflow, SIMD_VBIT, i);
1424 		}
1425 	      break;
1426 
1427 	    case SignedSaturation:
1428 	      s = ADDx ((i * 32), (i * 32) + 31, 0xffffffff, AddS32);
1429 	      x = IwmmxtSaturateS32 (s, satrv + BITIDX32 (i));
1430 	      r |= (x & 0xffffffff) << (i * 32);
1431 	      SIMD32_SET (psr, NBIT32 (x), SIMD_NBIT, i);
1432 	      SIMD32_SET (psr, ZBIT32 (x), SIMD_ZBIT, i);
1433 	      if (! satrv [BITIDX32 (i)])
1434 		{
1435 		  SIMD32_SET (psr, carry,    SIMD_CBIT, i);
1436 		  SIMD32_SET (psr, overflow, SIMD_VBIT, i);
1437 		}
1438 	      break;
1439 
1440 	    default:
1441 	      ARMul_UndefInstr (state, instr);
1442 	      return ARMul_DONE;
1443 	    }
1444 	}
1445       break;
1446 
1447     default:
1448       ARMul_UndefInstr (state, instr);
1449       return ARMul_DONE;
1450     }
1451 
1452   wC [wCASF] = psr;
1453   wR [BITS (12, 15)] = r;
1454   wC [wCon] |= (WCON_MUP | WCON_CUP);
1455 
1456   SET_wCSSFvec (satrv);
1457 
1458 #undef ADDx
1459 
1460   return ARMul_DONE;
1461 }
1462 
1463 static int
1464 WALIGNI (ARMword instr)
1465 {
1466   int shift = BITS (20, 22) * 8;
1467 
1468   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1469     return ARMul_CANT;
1470 
1471 #ifdef DEBUG
1472   fprintf (stderr, "waligni\n");
1473 #endif
1474 
1475   if (shift)
1476     wR [BITS (12, 15)] =
1477       wRBITS (BITS (16, 19), shift, 63)
1478       | (wRBITS (BITS (0, 3), 0, shift) << ((64 - shift)));
1479   else
1480     wR [BITS (12, 15)] = wR [BITS (16, 19)];
1481 
1482   wC [wCon] |= WCON_MUP;
1483   return ARMul_DONE;
1484 }
1485 
1486 static int
1487 WALIGNR (ARMul_State * state, ARMword instr)
1488 {
1489   int shift = (wC [BITS (20, 21) + 8] & 0x7) * 8;
1490 
1491   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1492     return ARMul_CANT;
1493 
1494 #ifdef DEBUG
1495   fprintf (stderr, "walignr\n");
1496 #endif
1497 
1498   if (shift)
1499     wR [BITS (12, 15)] =
1500       wRBITS (BITS (16, 19), shift, 63)
1501       | (wRBITS (BITS (0, 3), 0, shift) << ((64 - shift)));
1502   else
1503     wR [BITS (12, 15)] = wR [BITS (16, 19)];
1504 
1505   wC [wCon] |= WCON_MUP;
1506   return ARMul_DONE;
1507 }
1508 
1509 static int
1510 WAND (ARMword instr)
1511 {
1512   ARMdword result;
1513   ARMword  psr = 0;
1514 
1515   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1516     return ARMul_CANT;
1517 
1518 #ifdef DEBUG
1519   fprintf (stderr, "wand\n");
1520 #endif
1521 
1522   result = wR [BITS (16, 19)] & wR [BITS (0, 3)];
1523   wR [BITS (12, 15)] = result;
1524 
1525   SIMD64_SET (psr, (result == 0), SIMD_ZBIT);
1526   SIMD64_SET (psr, (result & (1ULL << 63)), SIMD_NBIT);
1527 
1528   wC [wCASF] = psr;
1529   wC [wCon] |= (WCON_CUP | WCON_MUP);
1530 
1531   return ARMul_DONE;
1532 }
1533 
1534 static int
1535 WANDN (ARMword instr)
1536 {
1537   ARMdword result;
1538   ARMword  psr = 0;
1539 
1540   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1541     return ARMul_CANT;
1542 
1543 #ifdef DEBUG
1544   fprintf (stderr, "wandn\n");
1545 #endif
1546 
1547   result = wR [BITS (16, 19)] & ~ wR [BITS (0, 3)];
1548   wR [BITS (12, 15)] = result;
1549 
1550   SIMD64_SET (psr, (result == 0), SIMD_ZBIT);
1551   SIMD64_SET (psr, (result & (1ULL << 63)), SIMD_NBIT);
1552 
1553   wC [wCASF] = psr;
1554   wC [wCon] |= (WCON_CUP | WCON_MUP);
1555 
1556   return ARMul_DONE;
1557 }
1558 
1559 static int
1560 WAVG2 (ARMword instr)
1561 {
1562   ARMdword r = 0;
1563   ARMword  psr = 0;
1564   ARMdword s;
1565   int      i;
1566   int      round = BIT (20) ? 1 : 0;
1567 
1568   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1569     return ARMul_CANT;
1570 
1571 #ifdef DEBUG
1572   fprintf (stderr, "wavg2\n");
1573 #endif
1574 
1575 #define AVG2x(x, y, m) (((wRBITS (BITS (16, 19), (x), (y)) & (m)) \
1576 		       + (wRBITS (BITS ( 0,  3), (x), (y)) & (m)) \
1577 		       + round) / 2)
1578 
1579   if (BIT (22))
1580     {
1581       for (i = 0; i < 4; i++)
1582 	{
1583 	  s = AVG2x ((i * 16), (i * 16) + 15, 0xffff) & 0xffff;
1584 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
1585 	  r |= s << (i * 16);
1586 	}
1587     }
1588   else
1589     {
1590       for (i = 0; i < 8; i++)
1591 	{
1592 	  s = AVG2x ((i * 8), (i * 8) + 7, 0xff) & 0xff;
1593 	  SIMD8_SET (psr, ZBIT8 (s), SIMD_ZBIT, i);
1594 	  r |= s << (i * 8);
1595 	}
1596     }
1597 
1598   wR [BITS (12, 15)] = r;
1599   wC [wCASF] = psr;
1600   wC [wCon] |= (WCON_CUP | WCON_MUP);
1601 
1602   return ARMul_DONE;
1603 }
1604 
1605 static int
1606 WCMPEQ (ARMul_State * state, ARMword instr)
1607 {
1608   ARMdword r = 0;
1609   ARMword  psr = 0;
1610   ARMdword s;
1611   int      i;
1612 
1613   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1614     return ARMul_CANT;
1615 
1616 #ifdef DEBUG
1617   fprintf (stderr, "wcmpeq\n");
1618 #endif
1619 
1620   switch (BITS (22, 23))
1621     {
1622     case Bqual:
1623       for (i = 0; i < 8; i++)
1624 	{
1625 	  s = wRBYTE (BITS (16, 19), i) == wRBYTE (BITS (0, 3), i) ? 0xff : 0;
1626 	  r |= s << (i * 8);
1627 	  SIMD8_SET (psr, NBIT8 (s), SIMD_NBIT, i);
1628 	  SIMD8_SET (psr, ZBIT8 (s), SIMD_ZBIT, i);
1629 	}
1630       break;
1631 
1632     case Hqual:
1633       for (i = 0; i < 4; i++)
1634 	{
1635 	  s = wRHALF (BITS (16, 19), i) == wRHALF (BITS (0, 3), i) ? 0xffff : 0;
1636 	  r |= s << (i * 16);
1637 	  SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
1638 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
1639 	}
1640       break;
1641 
1642     case Wqual:
1643       for (i = 0; i < 2; i++)
1644 	{
1645 	  s = wRWORD (BITS (16, 19), i) == wRWORD (BITS (0, 3), i) ? 0xffffffff : 0;
1646 	  r |= s << (i * 32);
1647 	  SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
1648 	  SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
1649 	}
1650       break;
1651 
1652     default:
1653       ARMul_UndefInstr (state, instr);
1654       return ARMul_DONE;
1655     }
1656 
1657   wC [wCASF] = psr;
1658   wR [BITS (12, 15)] = r;
1659   wC [wCon] |= (WCON_CUP | WCON_MUP);
1660 
1661   return ARMul_DONE;
1662 }
1663 
1664 static int
1665 WCMPGT (ARMul_State * state, ARMword instr)
1666 {
1667   ARMdword r = 0;
1668   ARMword  psr = 0;
1669   ARMdword s;
1670   int      i;
1671 
1672   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
1673     return ARMul_CANT;
1674 
1675 #ifdef DEBUG
1676   fprintf (stderr, "wcmpgt\n");
1677 #endif
1678 
1679   switch (BITS (22, 23))
1680     {
1681     case Bqual:
1682       if (BIT (21))
1683 	{
1684 	  /* Use a signed comparison.  */
1685 	  for (i = 0; i < 8; i++)
1686 	    {
1687 	      signed char a, b;
1688 
1689 	      a = wRBYTE (BITS (16, 19), i);
1690 	      b = wRBYTE (BITS (0, 3), i);
1691 
1692 	      s = (a > b) ? 0xff : 0;
1693 	      r |= s << (i * 8);
1694 	      SIMD8_SET (psr, NBIT8 (s), SIMD_NBIT, i);
1695 	      SIMD8_SET (psr, ZBIT8 (s), SIMD_ZBIT, i);
1696 	    }
1697 	}
1698       else
1699 	{
1700 	  for (i = 0; i < 8; i++)
1701 	    {
1702 	      s = (wRBYTE (BITS (16, 19), i) > wRBYTE (BITS (0, 3), i))
1703 		? 0xff : 0;
1704 	      r |= s << (i * 8);
1705 	      SIMD8_SET (psr, NBIT8 (s), SIMD_NBIT, i);
1706 	      SIMD8_SET (psr, ZBIT8 (s), SIMD_ZBIT, i);
1707 	    }
1708 	}
1709       break;
1710 
1711     case Hqual:
1712       if (BIT (21))
1713 	{
1714 	  for (i = 0; i < 4; i++)
1715 	    {
1716 	      signed int a, b;
1717 
1718 	      a = wRHALF (BITS (16, 19), i);
1719 	      a = EXTEND16 (a);
1720 
1721 	      b = wRHALF (BITS (0, 3), i);
1722 	      b = EXTEND16 (b);
1723 
1724 	      s = (a > b) ? 0xffff : 0;
1725 	      r |= s << (i * 16);
1726 	      SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
1727 	      SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
1728 	    }
1729 	}
1730       else
1731 	{
1732 	  for (i = 0; i < 4; i++)
1733 	    {
1734 	      s = (wRHALF (BITS (16, 19), i) > wRHALF (BITS (0, 3), i))
1735 		? 0xffff : 0;
1736 	      r |= s << (i * 16);
1737 	      SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
1738 	      SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
1739 	    }
1740 	}
1741       break;
1742 
1743     case Wqual:
1744       if (BIT (21))
1745 	{
1746 	  for (i = 0; i < 2; i++)
1747 	    {
1748 	      signed long a, b;
1749 
1750 	      a = EXTEND32 (wRWORD (BITS (16, 19), i));
1751 	      b = EXTEND32 (wRWORD (BITS (0, 3), i));
1752 
1753 	      s = (a > b) ? 0xffffffff : 0;
1754 	      r |= s << (i * 32);
1755 
1756 	      SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
1757 	      SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
1758 	    }
1759 	}
1760       else
1761 	{
1762 	  for (i = 0; i < 2; i++)
1763 	    {
1764 	      s = (wRWORD (BITS (16, 19), i) > wRWORD (BITS (0, 3), i))
1765 		? 0xffffffff : 0;
1766 	      r |= s << (i * 32);
1767 	      SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
1768 	      SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
1769 	    }
1770 	}
1771       break;
1772 
1773     default:
1774       ARMul_UndefInstr (state, instr);
1775       return ARMul_DONE;
1776     }
1777 
1778   wC [wCASF] = psr;
1779   wR [BITS (12, 15)] = r;
1780   wC [wCon] |= (WCON_CUP | WCON_MUP);
1781 
1782   return ARMul_DONE;
1783 }
1784 
1785 static ARMword
1786 Compute_Iwmmxt_Address (ARMul_State * state, ARMword instr, int * pFailed)
1787 {
1788   ARMword  Rn;
1789   ARMword  addr;
1790   ARMword  offset;
1791   ARMword  multiplier;
1792 
1793   * pFailed  = 0;
1794   Rn         = BITS (16, 19);
1795   addr       = state->Reg [Rn];
1796   offset     = BITS (0, 7);
1797   multiplier = BIT (8) ? 4 : 1;
1798 
1799   if (BIT (24)) /* P */
1800     {
1801       /* Pre Indexed Addressing.  */
1802       if (BIT (23))
1803 	addr += offset * multiplier;
1804       else
1805 	addr -= offset * multiplier;
1806 
1807       /* Immediate Pre-Indexed.  */
1808       if (BIT (21)) /* W */
1809 	{
1810 	  if (Rn == 15)
1811 	    {
1812 	      /* Writeback into R15 is UNPREDICTABLE.  */
1813 #ifdef DEBUG
1814 	      fprintf (stderr, "iWMMXt: writeback into r15\n");
1815 #endif
1816 	      * pFailed = 1;
1817 	    }
1818 	  else
1819 	    state->Reg [Rn] = addr;
1820 	}
1821     }
1822   else
1823     {
1824       /* Post Indexed Addressing.  */
1825       if (BIT (21)) /* W */
1826 	{
1827 	  /* Handle the write back of the final address.  */
1828 	  if (Rn == 15)
1829 	    {
1830 	      /* Writeback into R15 is UNPREDICTABLE.  */
1831 #ifdef DEBUG
1832 	      fprintf (stderr, "iWMMXt: writeback into r15\n");
1833 #endif
1834 	      * pFailed = 1;
1835 	    }
1836 	  else
1837 	    {
1838 	      ARMword  increment;
1839 
1840 	      if (BIT (23))
1841 		increment = offset * multiplier;
1842 	      else
1843 		increment = - (offset * multiplier);
1844 
1845 	      state->Reg [Rn] = addr + increment;
1846 	    }
1847 	}
1848       else
1849 	{
1850 	  /* P == 0, W == 0, U == 0 is UNPREDICTABLE.  */
1851 	  if (BIT (23) == 0)
1852 	    {
1853 #ifdef DEBUG
1854 	      fprintf (stderr, "iWMMXt: undefined addressing mode\n");
1855 #endif
1856 	      * pFailed = 1;
1857 	    }
1858 	}
1859     }
1860 
1861   return addr;
1862 }
1863 
1864 static ARMdword
1865 Iwmmxt_Load_Double_Word (ARMul_State * state, ARMword address)
1866 {
1867   ARMdword value;
1868 
1869   /* The address must be aligned on a 8 byte boundary.  */
1870   if (address & 0x7)
1871     {
1872       fprintf (stderr, "iWMMXt: At addr 0x%x: Unaligned double word load from 0x%x\n",
1873 	       (state->Reg[15] - 8) & ~0x3, address);
1874 #ifdef DEBUG
1875 #endif
1876       /* No need to check for alignment traps.  An unaligned
1877 	 double word load with alignment trapping disabled is
1878 	 UNPREDICTABLE.  */
1879       ARMul_Abort (state, ARMul_DataAbortV);
1880     }
1881 
1882   /* Load the words.  */
1883   if (! state->bigendSig)
1884     {
1885       value = ARMul_LoadWordN (state, address + 4);
1886       value <<= 32;
1887       value |= ARMul_LoadWordN (state, address);
1888     }
1889   else
1890     {
1891       value = ARMul_LoadWordN (state, address);
1892       value <<= 32;
1893       value |= ARMul_LoadWordN (state, address + 4);
1894     }
1895 
1896   /* Check for data aborts.  */
1897   if (state->Aborted)
1898     ARMul_Abort (state, ARMul_DataAbortV);
1899   else
1900     ARMul_Icycles (state, 2, 0L);
1901 
1902   return value;
1903 }
1904 
1905 static ARMword
1906 Iwmmxt_Load_Word (ARMul_State * state, ARMword address)
1907 {
1908   ARMword value;
1909 
1910   /* Check for a misaligned address.  */
1911   if (address & 3)
1912     {
1913       if ((read_cp15_reg (1, 0, 0) & ARMul_CP15_R1_ALIGN))
1914 	ARMul_Abort (state, ARMul_DataAbortV);
1915       else
1916 	address &= ~ 3;
1917     }
1918 
1919   value = ARMul_LoadWordN (state, address);
1920 
1921   if (state->Aborted)
1922     ARMul_Abort (state, ARMul_DataAbortV);
1923   else
1924     ARMul_Icycles (state, 1, 0L);
1925 
1926   return value;
1927 }
1928 
1929 static ARMword
1930 Iwmmxt_Load_Half_Word (ARMul_State * state, ARMword address)
1931 {
1932   ARMword value;
1933 
1934   /* Check for a misaligned address.  */
1935   if (address & 1)
1936     {
1937       if ((read_cp15_reg (1, 0, 0) & ARMul_CP15_R1_ALIGN))
1938 	ARMul_Abort (state, ARMul_DataAbortV);
1939       else
1940 	address &= ~ 1;
1941     }
1942 
1943   value = ARMul_LoadHalfWord (state, address);
1944 
1945   if (state->Aborted)
1946     ARMul_Abort (state, ARMul_DataAbortV);
1947   else
1948     ARMul_Icycles (state, 1, 0L);
1949 
1950   return value;
1951 }
1952 
1953 static ARMword
1954 Iwmmxt_Load_Byte (ARMul_State * state, ARMword address)
1955 {
1956   ARMword value;
1957 
1958   value = ARMul_LoadByte (state, address);
1959 
1960   if (state->Aborted)
1961     ARMul_Abort (state, ARMul_DataAbortV);
1962   else
1963     ARMul_Icycles (state, 1, 0L);
1964 
1965   return value;
1966 }
1967 
1968 static void
1969 Iwmmxt_Store_Double_Word (ARMul_State * state, ARMword address, ARMdword value)
1970 {
1971   /* The address must be aligned on a 8 byte boundary.  */
1972   if (address & 0x7)
1973     {
1974       fprintf (stderr, "iWMMXt: At addr 0x%x: Unaligned double word store to 0x%x\n",
1975 	       (state->Reg[15] - 8) & ~0x3, address);
1976 #ifdef DEBUG
1977 #endif
1978       /* No need to check for alignment traps.  An unaligned
1979 	 double word store with alignment trapping disabled is
1980 	 UNPREDICTABLE.  */
1981       ARMul_Abort (state, ARMul_DataAbortV);
1982     }
1983 
1984   /* Store the words.  */
1985   if (! state->bigendSig)
1986     {
1987       ARMul_StoreWordN (state, address, value);
1988       ARMul_StoreWordN (state, address + 4, value >> 32);
1989     }
1990   else
1991     {
1992       ARMul_StoreWordN (state, address + 4, value);
1993       ARMul_StoreWordN (state, address, value >> 32);
1994     }
1995 
1996   /* Check for data aborts.  */
1997   if (state->Aborted)
1998     ARMul_Abort (state, ARMul_DataAbortV);
1999   else
2000     ARMul_Icycles (state, 2, 0L);
2001 }
2002 
2003 static void
2004 Iwmmxt_Store_Word (ARMul_State * state, ARMword address, ARMword value)
2005 {
2006   /* Check for a misaligned address.  */
2007   if (address & 3)
2008     {
2009       if ((read_cp15_reg (1, 0, 0) & ARMul_CP15_R1_ALIGN))
2010 	ARMul_Abort (state, ARMul_DataAbortV);
2011       else
2012 	address &= ~ 3;
2013     }
2014 
2015   ARMul_StoreWordN (state, address, value);
2016 
2017   if (state->Aborted)
2018     ARMul_Abort (state, ARMul_DataAbortV);
2019 }
2020 
2021 static void
2022 Iwmmxt_Store_Half_Word (ARMul_State * state, ARMword address, ARMword value)
2023 {
2024   /* Check for a misaligned address.  */
2025   if (address & 1)
2026     {
2027       if ((read_cp15_reg (1, 0, 0) & ARMul_CP15_R1_ALIGN))
2028 	ARMul_Abort (state, ARMul_DataAbortV);
2029       else
2030 	address &= ~ 1;
2031     }
2032 
2033   ARMul_StoreHalfWord (state, address, value);
2034 
2035   if (state->Aborted)
2036     ARMul_Abort (state, ARMul_DataAbortV);
2037 }
2038 
2039 static void
2040 Iwmmxt_Store_Byte (ARMul_State * state, ARMword address, ARMword value)
2041 {
2042   ARMul_StoreByte (state, address, value);
2043 
2044   if (state->Aborted)
2045     ARMul_Abort (state, ARMul_DataAbortV);
2046 }
2047 
2048 static int
2049 WLDR (ARMul_State * state, ARMword instr)
2050 {
2051   ARMword address;
2052   int failed;
2053 
2054   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2055     return ARMul_CANT;
2056 
2057 #ifdef DEBUG
2058   fprintf (stderr, "wldr\n");
2059 #endif
2060 
2061   address = Compute_Iwmmxt_Address (state, instr, & failed);
2062   if (failed)
2063     return ARMul_CANT;
2064 
2065   if (BITS (28, 31) == 0xf)
2066     {
2067       /* WLDRW wCx */
2068       wC [BITS (12, 15)] = Iwmmxt_Load_Word (state, address);
2069     }
2070   else if (BIT (8) == 0)
2071     {
2072       if (BIT (22) == 0)
2073 	/* WLDRB */
2074 	wR [BITS (12, 15)] = Iwmmxt_Load_Byte (state, address);
2075       else
2076 	/* WLDRH */
2077 	wR [BITS (12, 15)] = Iwmmxt_Load_Half_Word (state, address);
2078     }
2079   else
2080     {
2081       if (BIT (22) == 0)
2082 	/* WLDRW wRd */
2083 	wR [BITS (12, 15)] = Iwmmxt_Load_Word (state, address);
2084       else
2085 	/* WLDRD */
2086 	wR [BITS (12, 15)] = Iwmmxt_Load_Double_Word (state, address);
2087     }
2088 
2089   wC [wCon] |= WCON_MUP;
2090 
2091   return ARMul_DONE;
2092 }
2093 
2094 static int
2095 WMAC (ARMword instr)
2096 {
2097   int      i;
2098   ARMdword t = 0;
2099   ARMword  a, b;
2100 
2101   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2102     return ARMul_CANT;
2103 
2104 #ifdef DEBUG
2105   fprintf (stderr, "wmac\n");
2106 #endif
2107 
2108   for (i = 0; i < 4; i++)
2109     {
2110       if (BIT (21))
2111         {
2112 	  /* Signed.  */
2113 	  signed long s;
2114 
2115 	  a = wRHALF (BITS (16, 19), i);
2116 	  a = EXTEND16 (a);
2117 
2118 	  b = wRHALF (BITS (0, 3), i);
2119 	  b = EXTEND16 (b);
2120 
2121 	  s = (signed long) a * (signed long) b;
2122 
2123 	  t = t + (ARMdword) s;
2124         }
2125       else
2126         {
2127 	  /* Unsigned.  */
2128 	  a = wRHALF (BITS (16, 19), i);
2129 	  b = wRHALF (BITS ( 0,  3), i);
2130 
2131 	  t += a * b;
2132         }
2133     }
2134 
2135   if (BIT (21))
2136     t = EXTEND32 (t);
2137   else
2138     t &= 0xffffffff;
2139 
2140   if (BIT (20))
2141     wR [BITS (12, 15)] = t;
2142   else
2143     wR[BITS (12, 15)] += t;
2144 
2145   wC [wCon] |= WCON_MUP;
2146 
2147   return ARMul_DONE;
2148 }
2149 
2150 static int
2151 WMADD (ARMword instr)
2152 {
2153   ARMdword r = 0;
2154   int i;
2155 
2156   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2157     return ARMul_CANT;
2158 
2159 #ifdef DEBUG
2160   fprintf (stderr, "wmadd\n");
2161 #endif
2162 
2163   for (i = 0; i < 2; i++)
2164     {
2165       ARMdword s1, s2;
2166 
2167       if (BIT (21))	/* Signed.  */
2168         {
2169 	  signed long a, b;
2170 
2171 	  a = wRHALF (BITS (16, 19), i * 2);
2172 	  a = EXTEND16 (a);
2173 
2174 	  b = wRHALF (BITS (0, 3), i * 2);
2175 	  b = EXTEND16 (b);
2176 
2177 	  s1 = (ARMdword) (a * b);
2178 
2179 	  a = wRHALF (BITS (16, 19), i * 2 + 1);
2180 	  a = EXTEND16 (a);
2181 
2182 	  b = wRHALF (BITS (0, 3), i * 2 + 1);
2183 	  b = EXTEND16 (b);
2184 
2185 	  s2 = (ARMdword) (a * b);
2186         }
2187       else			/* Unsigned.  */
2188         {
2189 	  unsigned long a, b;
2190 
2191 	  a = wRHALF (BITS (16, 19), i * 2);
2192 	  b = wRHALF (BITS ( 0,  3), i * 2);
2193 
2194 	  s1 = (ARMdword) (a * b);
2195 
2196 	  a = wRHALF (BITS (16, 19), i * 2 + 1);
2197 	  b = wRHALF (BITS ( 0,  3), i * 2 + 1);
2198 
2199 	  s2 = (ARMdword) a * b;
2200         }
2201 
2202       r |= (ARMdword) ((s1 + s2) & 0xffffffff) << (i ? 32 : 0);
2203     }
2204 
2205   wR [BITS (12, 15)] = r;
2206   wC [wCon] |= WCON_MUP;
2207 
2208   return ARMul_DONE;
2209 }
2210 
2211 static int
2212 WMAX (ARMul_State * state, ARMword instr)
2213 {
2214   ARMdword r = 0;
2215   ARMdword s;
2216   int      i;
2217 
2218   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2219     return ARMul_CANT;
2220 
2221 #ifdef DEBUG
2222   fprintf (stderr, "wmax\n");
2223 #endif
2224 
2225   switch (BITS (22, 23))
2226     {
2227     case Bqual:
2228       for (i = 0; i < 8; i++)
2229 	if (BIT (21))	/* Signed.  */
2230 	  {
2231 	    int a, b;
2232 
2233 	    a = wRBYTE (BITS (16, 19), i);
2234 	    a = EXTEND8 (a);
2235 
2236 	    b = wRBYTE (BITS (0, 3), i);
2237 	    b = EXTEND8 (b);
2238 
2239 	    if (a > b)
2240 	      s = a;
2241 	    else
2242 	      s = b;
2243 
2244 	    r |= (s & 0xff) << (i * 8);
2245 	  }
2246 	else	 	/* Unsigned.  */
2247 	  {
2248 	    unsigned int a, b;
2249 
2250 	    a = wRBYTE (BITS (16, 19), i);
2251 	    b = wRBYTE (BITS (0, 3), i);
2252 
2253 	    if (a > b)
2254 	      s = a;
2255 	    else
2256 	      s = b;
2257 
2258 	    r |= (s & 0xff) << (i * 8);
2259           }
2260       break;
2261 
2262     case Hqual:
2263       for (i = 0; i < 4; i++)
2264 	if (BIT (21))	/* Signed.  */
2265 	  {
2266 	    int a, b;
2267 
2268 	    a = wRHALF (BITS (16, 19), i);
2269 	    a = EXTEND16 (a);
2270 
2271 	    b = wRHALF (BITS (0, 3), i);
2272 	    b = EXTEND16 (b);
2273 
2274 	    if (a > b)
2275 	      s = a;
2276 	    else
2277 	      s = b;
2278 
2279 	    r |= (s & 0xffff) << (i * 16);
2280 	  }
2281 	else	 	/* Unsigned.  */
2282 	  {
2283 	    unsigned int a, b;
2284 
2285 	    a = wRHALF (BITS (16, 19), i);
2286 	    b = wRHALF (BITS (0, 3), i);
2287 
2288 	    if (a > b)
2289 	      s = a;
2290 	    else
2291 	      s = b;
2292 
2293 	    r |= (s & 0xffff) << (i * 16);
2294           }
2295       break;
2296 
2297     case Wqual:
2298       for (i = 0; i < 2; i++)
2299 	if (BIT (21))	/* Signed.  */
2300 	  {
2301 	    int a, b;
2302 
2303 	    a = wRWORD (BITS (16, 19), i);
2304 	    b = wRWORD (BITS (0, 3), i);
2305 
2306 	    if (a > b)
2307 	      s = a;
2308 	    else
2309 	      s = b;
2310 
2311 	    r |= (s & 0xffffffff) << (i * 32);
2312 	  }
2313 	else
2314 	  {
2315 	    unsigned int a, b;
2316 
2317 	    a = wRWORD (BITS (16, 19), i);
2318 	    b = wRWORD (BITS (0, 3), i);
2319 
2320 	    if (a > b)
2321 	      s = a;
2322 	    else
2323 	      s = b;
2324 
2325 	    r |= (s & 0xffffffff) << (i * 32);
2326           }
2327       break;
2328 
2329     default:
2330       ARMul_UndefInstr (state, instr);
2331       return ARMul_DONE;
2332     }
2333 
2334   wR [BITS (12, 15)] = r;
2335   wC [wCon] |= WCON_MUP;
2336 
2337   return ARMul_DONE;
2338 }
2339 
2340 static int
2341 WMIN (ARMul_State * state, ARMword instr)
2342 {
2343   ARMdword r = 0;
2344   ARMdword s;
2345   int      i;
2346 
2347   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2348     return ARMul_CANT;
2349 
2350 #ifdef DEBUG
2351   fprintf (stderr, "wmin\n");
2352 #endif
2353 
2354   switch (BITS (22, 23))
2355     {
2356     case Bqual:
2357       for (i = 0; i < 8; i++)
2358 	if (BIT (21))	/* Signed.  */
2359 	  {
2360 	    int a, b;
2361 
2362 	    a = wRBYTE (BITS (16, 19), i);
2363 	    a = EXTEND8 (a);
2364 
2365 	    b = wRBYTE (BITS (0, 3), i);
2366 	    b = EXTEND8 (b);
2367 
2368 	    if (a < b)
2369 	      s = a;
2370 	    else
2371 	      s = b;
2372 
2373 	    r |= (s & 0xff) << (i * 8);
2374 	  }
2375 	else	 	/* Unsigned.  */
2376 	  {
2377 	    unsigned int a, b;
2378 
2379 	    a = wRBYTE (BITS (16, 19), i);
2380 	    b = wRBYTE (BITS (0, 3), i);
2381 
2382 	    if (a < b)
2383 	      s = a;
2384 	    else
2385 	      s = b;
2386 
2387 	    r |= (s & 0xff) << (i * 8);
2388           }
2389       break;
2390 
2391     case Hqual:
2392       for (i = 0; i < 4; i++)
2393 	if (BIT (21))	/* Signed.  */
2394 	  {
2395 	    int a, b;
2396 
2397 	    a = wRHALF (BITS (16, 19), i);
2398 	    a = EXTEND16 (a);
2399 
2400 	    b = wRHALF (BITS (0, 3), i);
2401 	    b = EXTEND16 (b);
2402 
2403 	    if (a < b)
2404 	      s = a;
2405 	    else
2406 	      s = b;
2407 
2408 	    r |= (s & 0xffff) << (i * 16);
2409 	  }
2410 	else
2411 	  {
2412 	    /* Unsigned.  */
2413 	    unsigned int a, b;
2414 
2415 	    a = wRHALF (BITS (16, 19), i);
2416 	    b = wRHALF (BITS ( 0,  3), i);
2417 
2418 	    if (a < b)
2419 	      s = a;
2420 	    else
2421 	      s = b;
2422 
2423 	    r |= (s & 0xffff) << (i * 16);
2424           }
2425       break;
2426 
2427     case Wqual:
2428       for (i = 0; i < 2; i++)
2429 	if (BIT (21))	/* Signed.  */
2430 	  {
2431 	    int a, b;
2432 
2433 	    a = wRWORD (BITS (16, 19), i);
2434 	    b = wRWORD (BITS ( 0,  3), i);
2435 
2436 	    if (a < b)
2437 	      s = a;
2438 	    else
2439 	      s = b;
2440 
2441 	    r |= (s & 0xffffffff) << (i * 32);
2442 	  }
2443 	else
2444 	  {
2445 	    unsigned int a, b;
2446 
2447 	    a = wRWORD (BITS (16, 19), i);
2448 	    b = wRWORD (BITS (0, 3), i);
2449 
2450 	    if (a < b)
2451 	      s = a;
2452 	    else
2453 	      s = b;
2454 
2455 	    r |= (s & 0xffffffff) << (i * 32);
2456           }
2457       break;
2458 
2459     default:
2460       ARMul_UndefInstr (state, instr);
2461       return ARMul_DONE;
2462     }
2463 
2464   wR [BITS (12, 15)] = r;
2465   wC [wCon] |= WCON_MUP;
2466 
2467   return ARMul_DONE;
2468 }
2469 
2470 static int
2471 WMUL (ARMword instr)
2472 {
2473   ARMdword r = 0;
2474   ARMdword s;
2475   int      i;
2476 
2477   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2478     return ARMul_CANT;
2479 
2480 #ifdef DEBUG
2481   fprintf (stderr, "wmul\n");
2482 #endif
2483 
2484   for (i = 0; i < 4; i++)
2485     if (BIT (21))	/* Signed.  */
2486       {
2487 	long a, b;
2488 
2489 	a = wRHALF (BITS (16, 19), i);
2490 	a = EXTEND16 (a);
2491 
2492 	b = wRHALF (BITS (0, 3), i);
2493 	b = EXTEND16 (b);
2494 
2495 	s = a * b;
2496 
2497 	if (BIT (20))
2498 	  r |= ((s >> 16) & 0xffff) << (i * 16);
2499 	else
2500 	  r |= (s & 0xffff) << (i * 16);
2501       }
2502     else		/* Unsigned.  */
2503       {
2504 	unsigned long a, b;
2505 
2506 	a = wRHALF (BITS (16, 19), i);
2507 	b = wRHALF (BITS (0, 3), i);
2508 
2509 	s = a * b;
2510 
2511 	if (BIT (20))
2512 	  r |= ((s >> 16) & 0xffff) << (i * 16);
2513 	else
2514 	  r |= (s & 0xffff) << (i * 16);
2515       }
2516 
2517   wR [BITS (12, 15)] = r;
2518   wC [wCon] |= WCON_MUP;
2519 
2520   return ARMul_DONE;
2521 }
2522 
2523 static int
2524 WOR (ARMword instr)
2525 {
2526   ARMword psr = 0;
2527   ARMdword result;
2528 
2529   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2530     return ARMul_CANT;
2531 
2532 #ifdef DEBUG
2533   fprintf (stderr, "wor\n");
2534 #endif
2535 
2536   result = wR [BITS (16, 19)] | wR [BITS (0, 3)];
2537   wR [BITS (12, 15)] = result;
2538 
2539   SIMD64_SET (psr, (result == 0), SIMD_ZBIT);
2540   SIMD64_SET (psr, (result & (1ULL << 63)), SIMD_NBIT);
2541 
2542   wC [wCASF] = psr;
2543   wC [wCon] |= (WCON_CUP | WCON_MUP);
2544 
2545   return ARMul_DONE;
2546 }
2547 
2548 static int
2549 WPACK (ARMul_State * state, ARMword instr)
2550 {
2551   ARMdword r = 0;
2552   ARMword  psr = 0;
2553   ARMdword x;
2554   ARMdword s;
2555   int      i;
2556   int      satrv[8];
2557 
2558   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2559     return ARMul_CANT;
2560 
2561 #ifdef DEBUG
2562   fprintf (stderr, "wpack\n");
2563 #endif
2564 
2565   switch (BITS (22, 23))
2566     {
2567     case Hqual:
2568       for (i = 0; i < 8; i++)
2569 	{
2570 	  x = wRHALF (i < 4 ? BITS (16, 19) : BITS (0, 3), i & 3);
2571 
2572 	  switch (BITS (20, 21))
2573 	    {
2574 	    case UnsignedSaturation:
2575 	      s = IwmmxtSaturateU8 (x, satrv + BITIDX8 (i));
2576 	      break;
2577 
2578 	    case SignedSaturation:
2579 	      s = IwmmxtSaturateS8 (x, satrv + BITIDX8 (i));
2580 	      break;
2581 
2582 	    default:
2583 	      ARMul_UndefInstr (state, instr);
2584 	      return ARMul_DONE;
2585 	    }
2586 
2587 	  r |= (s & 0xff) << (i * 8);
2588 	  SIMD8_SET (psr, NBIT8 (s), SIMD_NBIT, i);
2589 	  SIMD8_SET (psr, ZBIT8 (s), SIMD_ZBIT, i);
2590 	}
2591       break;
2592 
2593     case Wqual:
2594       satrv[0] = satrv[2] = satrv[4] = satrv[6] = 0;
2595 
2596       for (i = 0; i < 4; i++)
2597 	{
2598 	  x = wRWORD (i < 2 ? BITS (16, 19) : BITS (0, 3), i & 1);
2599 
2600 	  switch (BITS (20, 21))
2601 	    {
2602 	    case UnsignedSaturation:
2603 	      s = IwmmxtSaturateU16 (x, satrv + BITIDX16 (i));
2604 	      break;
2605 
2606 	    case SignedSaturation:
2607 	      s = IwmmxtSaturateS16 (x, satrv + BITIDX16 (i));
2608 	      break;
2609 
2610 	    default:
2611 	      ARMul_UndefInstr (state, instr);
2612 	      return ARMul_DONE;
2613 	    }
2614 
2615 	  r |= (s & 0xffff) << (i * 16);
2616 	  SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
2617 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
2618 	}
2619       break;
2620 
2621     case Dqual:
2622       satrv[0] = satrv[1] = satrv[2] = satrv[4] = satrv[5] = satrv[6] = 0;
2623 
2624       for (i = 0; i < 2; i++)
2625 	{
2626 	  x = wR [i ? BITS (0, 3) : BITS (16, 19)];
2627 
2628 	  switch (BITS (20, 21))
2629 	    {
2630 	    case UnsignedSaturation:
2631 	      s = IwmmxtSaturateU32 (x, satrv + BITIDX32 (i));
2632 	      break;
2633 
2634 	    case SignedSaturation:
2635 	      s = IwmmxtSaturateS32 (x, satrv + BITIDX32 (i));
2636 	      break;
2637 
2638 	    default:
2639 	      ARMul_UndefInstr (state, instr);
2640 	      return ARMul_DONE;
2641 	    }
2642 
2643 	  r |= (s & 0xffffffff) << (i * 32);
2644 	  SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
2645 	  SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
2646 	}
2647       break;
2648 
2649     default:
2650       ARMul_UndefInstr (state, instr);
2651       return ARMul_DONE;
2652     }
2653 
2654   wC [wCASF] = psr;
2655   wR [BITS (12, 15)] = r;
2656   SET_wCSSFvec (satrv);
2657   wC [wCon] |= (WCON_CUP | WCON_MUP);
2658 
2659   return ARMul_DONE;
2660 }
2661 
2662 static int
2663 WROR (ARMul_State * state, ARMword instr)
2664 {
2665   ARMdword r = 0;
2666   ARMdword s;
2667   ARMword  psr = 0;
2668   int      i;
2669   int      shift;
2670 
2671   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2672     return ARMul_CANT;
2673 
2674 #ifdef DEBUG
2675   fprintf (stderr, "wror\n");
2676 #endif
2677 
2678   DECODE_G_BIT (state, instr, shift);
2679 
2680   switch (BITS (22, 23))
2681     {
2682     case Hqual:
2683       shift &= 0xf;
2684       for (i = 0; i < 4; i++)
2685 	{
2686 	  s = ((wRHALF (BITS (16, 19), i) & 0xffff) << (16 - shift))
2687 	    | ((wRHALF (BITS (16, 19), i) & 0xffff) >> shift);
2688 	  r |= (s & 0xffff) << (i * 16);
2689 	  SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
2690 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
2691 	}
2692       break;
2693 
2694     case Wqual:
2695       shift &= 0x1f;
2696       for (i = 0; i < 2; i++)
2697 	{
2698 	  s = ((wRWORD (BITS (16, 19), i) & 0xffffffff) << (32 - shift))
2699 	    | ((wRWORD (BITS (16, 19), i) & 0xffffffff) >> shift);
2700 	  r |= (s & 0xffffffff) << (i * 32);
2701 	  SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
2702 	  SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
2703 	}
2704       break;
2705 
2706     case Dqual:
2707       shift &= 0x3f;
2708       r = (wR [BITS (16, 19)] >> shift)
2709 	| (wR [BITS (16, 19)] << (64 - shift));
2710 
2711       SIMD64_SET (psr, NBIT64 (r), SIMD_NBIT);
2712       SIMD64_SET (psr, ZBIT64 (r), SIMD_ZBIT);
2713       break;
2714 
2715     default:
2716       ARMul_UndefInstr (state, instr);
2717       return ARMul_DONE;
2718     }
2719 
2720   wC [wCASF] = psr;
2721   wR [BITS (12, 15)] = r;
2722   wC [wCon] |= (WCON_CUP | WCON_MUP);
2723 
2724   return ARMul_DONE;
2725 }
2726 
2727 static int
2728 WSAD (ARMword instr)
2729 {
2730   ARMdword r;
2731   int      s;
2732   int      i;
2733 
2734   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2735     return ARMul_CANT;
2736 
2737 #ifdef DEBUG
2738   fprintf (stderr, "wsad\n");
2739 #endif
2740 
2741   /* Z bit.  */
2742   r = BIT (20) ? 0 : (wR [BITS (12, 15)] & 0xffffffff);
2743 
2744   if (BIT (22))
2745     /* Half.  */
2746     for (i = 0; i < 4; i++)
2747       {
2748 	s = (wRHALF (BITS (16, 19), i) - wRHALF (BITS (0, 3), i));
2749 	r += abs (s);
2750       }
2751   else
2752     /* Byte.  */
2753     for (i = 0; i < 8; i++)
2754       {
2755 	s = (wRBYTE (BITS (16, 19), i) - wRBYTE (BITS (0, 3), i));
2756 	r += abs (s);
2757       }
2758 
2759   wR [BITS (12, 15)] = r;
2760   wC [wCon] |= WCON_MUP;
2761 
2762   return ARMul_DONE;
2763 }
2764 
2765 static int
2766 WSHUFH (ARMword instr)
2767 {
2768   ARMdword r = 0;
2769   ARMword  psr = 0;
2770   ARMdword s;
2771   int      i;
2772   int      imm8;
2773 
2774   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2775     return ARMul_CANT;
2776 
2777 #ifdef DEBUG
2778   fprintf (stderr, "wshufh\n");
2779 #endif
2780 
2781   imm8 = (BITS (20, 23) << 4) | BITS (0, 3);
2782 
2783   for (i = 0; i < 4; i++)
2784     {
2785       s = wRHALF (BITS (16, 19), ((imm8 >> (i * 2) & 3)) & 0xff);
2786       r |= (s & 0xffff) << (i * 16);
2787       SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
2788       SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
2789     }
2790 
2791   wC [wCASF] = psr;
2792   wR [BITS (12, 15)] = r;
2793   wC [wCon] |= (WCON_CUP | WCON_MUP);
2794 
2795   return ARMul_DONE;
2796 }
2797 
2798 static int
2799 WSLL (ARMul_State * state, ARMword instr)
2800 {
2801   ARMdword r = 0;
2802   ARMdword s;
2803   ARMword  psr = 0;
2804   int      i;
2805   unsigned shift;
2806 
2807   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2808     return ARMul_CANT;
2809 
2810 #ifdef DEBUG
2811   fprintf (stderr, "wsll\n");
2812 #endif
2813 
2814   DECODE_G_BIT (state, instr, shift);
2815 
2816   switch (BITS (22, 23))
2817     {
2818     case Hqual:
2819       for (i = 0; i < 4; i++)
2820 	{
2821 	  if (shift > 15)
2822 	    s = 0;
2823 	  else
2824 	    s = ((wRHALF (BITS (16, 19), i) & 0xffff) << shift);
2825 	  r |= (s & 0xffff) << (i * 16);
2826 	  SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
2827 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
2828 	}
2829       break;
2830 
2831     case Wqual:
2832       for (i = 0; i < 2; i++)
2833 	{
2834 	  if (shift > 31)
2835 	    s = 0;
2836 	  else
2837 	    s = ((wRWORD (BITS (16, 19), i) & 0xffffffff) << shift);
2838 	  r |= (s & 0xffffffff) << (i * 32);
2839 	  SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
2840 	  SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
2841 	}
2842       break;
2843 
2844     case Dqual:
2845       if (shift > 63)
2846 	r = 0;
2847       else
2848 	r = ((wR[BITS (16, 19)] & 0xffffffffffffffffULL) << shift);
2849 
2850       SIMD64_SET (psr, NBIT64 (r), SIMD_NBIT);
2851       SIMD64_SET (psr, ZBIT64 (r), SIMD_ZBIT);
2852       break;
2853 
2854     default:
2855       ARMul_UndefInstr (state, instr);
2856       return ARMul_DONE;
2857     }
2858 
2859   wC [wCASF] = psr;
2860   wR [BITS (12, 15)] = r;
2861   wC [wCon] |= (WCON_CUP | WCON_MUP);
2862 
2863   return ARMul_DONE;
2864 }
2865 
2866 static int
2867 WSRA (ARMul_State * state, ARMword instr)
2868 {
2869   ARMdword     r = 0;
2870   ARMdword     s;
2871   ARMword      psr = 0;
2872   int          i;
2873   unsigned     shift;
2874   signed long  t;
2875 
2876   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2877     return ARMul_CANT;
2878 
2879 #ifdef DEBUG
2880   fprintf (stderr, "wsra\n");
2881 #endif
2882 
2883   DECODE_G_BIT (state, instr, shift);
2884 
2885   switch (BITS (22, 23))
2886     {
2887     case Hqual:
2888       for (i = 0; i < 4; i++)
2889 	{
2890 	  if (shift > 15)
2891 	    t = (wRHALF (BITS (16, 19), i) & 0x8000) ? 0xffff : 0;
2892 	  else
2893 	    {
2894 	      t = wRHALF (BITS (16, 19), i);
2895 	      t = EXTEND16 (t);
2896 	      t >>= shift;
2897 	    }
2898 
2899 	  s = t;
2900 	  r |= (s & 0xffff) << (i * 16);
2901 	  SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
2902 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
2903 	}
2904       break;
2905 
2906     case Wqual:
2907       for (i = 0; i < 2; i++)
2908 	{
2909 	  if (shift > 31)
2910 	    t = (wRWORD (BITS (16, 19), i) & 0x80000000) ? 0xffffffff : 0;
2911 	  else
2912 	    {
2913 	      t = EXTEND32 (wRWORD (BITS (16, 19), i));
2914 	      t >>= shift;
2915 	    }
2916 	  s = t;
2917 	  r |= (s & 0xffffffff) << (i * 32);
2918 	  SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
2919 	  SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
2920 	}
2921       break;
2922 
2923     case Dqual:
2924       if (shift > 63)
2925 	r = (wR [BITS (16, 19)] & 0x8000000000000000ULL) ? 0xffffffffffffffffULL : 0;
2926       else
2927 	r = ((signed long long) (wR[BITS (16, 19)] & 0xffffffffffffffffULL) >> shift);
2928       SIMD64_SET (psr, NBIT64 (r), SIMD_NBIT);
2929       SIMD64_SET (psr, ZBIT64 (r), SIMD_ZBIT);
2930       break;
2931 
2932     default:
2933       ARMul_UndefInstr (state, instr);
2934       return ARMul_DONE;
2935     }
2936 
2937   wC [wCASF] = psr;
2938   wR [BITS (12, 15)] = r;
2939   wC [wCon] |= (WCON_CUP | WCON_MUP);
2940 
2941   return ARMul_DONE;
2942 }
2943 
2944 static int
2945 WSRL (ARMul_State * state, ARMword instr)
2946 {
2947   ARMdword     r = 0;
2948   ARMdword     s;
2949   ARMword      psr = 0;
2950   int          i;
2951   unsigned int shift;
2952 
2953   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
2954     return ARMul_CANT;
2955 
2956 #ifdef DEBUG
2957   fprintf (stderr, "wsrl\n");
2958 #endif
2959 
2960   DECODE_G_BIT (state, instr, shift);
2961 
2962   switch (BITS (22, 23))
2963     {
2964     case Hqual:
2965       for (i = 0; i < 4; i++)
2966 	{
2967 	  if (shift > 15)
2968 	    s = 0;
2969 	  else
2970 	    s = ((unsigned) (wRHALF (BITS (16, 19), i) & 0xffff) >> shift);
2971 
2972 	  r |= (s & 0xffff) << (i * 16);
2973 	  SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
2974 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
2975 	}
2976       break;
2977 
2978     case Wqual:
2979       for (i = 0; i < 2; i++)
2980 	{
2981 	  if (shift > 31)
2982 	    s = 0;
2983 	  else
2984 	    s = ((unsigned long) (wRWORD (BITS (16, 19), i) & 0xffffffff) >> shift);
2985 
2986 	  r |= (s & 0xffffffff) << (i * 32);
2987 	  SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
2988 	  SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
2989 	}
2990       break;
2991 
2992     case Dqual:
2993       if (shift > 63)
2994 	r = 0;
2995       else
2996 	r = (wR [BITS (16, 19)] & 0xffffffffffffffffULL) >> shift;
2997 
2998       SIMD64_SET (psr, NBIT64 (r), SIMD_NBIT);
2999       SIMD64_SET (psr, ZBIT64 (r), SIMD_ZBIT);
3000       break;
3001 
3002     default:
3003       ARMul_UndefInstr (state, instr);
3004       return ARMul_DONE;
3005     }
3006 
3007   wC [wCASF] = psr;
3008   wR [BITS (12, 15)] = r;
3009   wC [wCon] |= (WCON_CUP | WCON_MUP);
3010 
3011   return ARMul_DONE;
3012 }
3013 
3014 static int
3015 WSTR (ARMul_State * state, ARMword instr)
3016 {
3017   ARMword address;
3018   int failed;
3019 
3020 
3021   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
3022     return ARMul_CANT;
3023 
3024 #ifdef DEBUG
3025   fprintf (stderr, "wstr\n");
3026 #endif
3027 
3028   address = Compute_Iwmmxt_Address (state, instr, & failed);
3029   if (failed)
3030     return ARMul_CANT;
3031 
3032   if (BITS (28, 31) == 0xf)
3033     {
3034       /* WSTRW wCx */
3035       Iwmmxt_Store_Word (state, address, wC [BITS (12, 15)]);
3036     }
3037   else if (BIT (8) == 0)
3038     {
3039       if (BIT (22) == 0)
3040 	/* WSTRB */
3041 	Iwmmxt_Store_Byte (state, address, wR [BITS (12, 15)]);
3042       else
3043 	/* WSTRH */
3044 	Iwmmxt_Store_Half_Word (state, address, wR [BITS (12, 15)]);
3045     }
3046   else
3047     {
3048       if (BIT (22) == 0)
3049 	/* WSTRW wRd */
3050 	Iwmmxt_Store_Word (state, address, wR [BITS (12, 15)]);
3051       else
3052 	/* WSTRD */
3053 	Iwmmxt_Store_Double_Word (state, address, wR [BITS (12, 15)]);
3054     }
3055 
3056   return ARMul_DONE;
3057 }
3058 
3059 static int
3060 WSUB (ARMul_State * state, ARMword instr)
3061 {
3062   ARMdword r = 0;
3063   ARMword  psr = 0;
3064   ARMdword x;
3065   ARMdword s;
3066   int      i;
3067   int      carry;
3068   int      overflow;
3069   int      satrv[8];
3070 
3071   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
3072     return ARMul_CANT;
3073 
3074 #ifdef DEBUG
3075   fprintf (stderr, "wsub\n");
3076 #endif
3077 
3078 /* Subtract two numbers using the specified function,
3079    leaving setting the carry bit as required.  */
3080 #define SUBx(x, y, m, f) \
3081    (*f) (wRBITS (BITS (16, 19), (x), (y)) & (m), \
3082          wRBITS (BITS ( 0,  3), (x), (y)) & (m), & carry, & overflow)
3083 
3084   switch (BITS (22, 23))
3085     {
3086     case Bqual:
3087       for (i = 0; i < 8; i++)
3088         {
3089 	  switch (BITS (20, 21))
3090 	    {
3091 	    case NoSaturation:
3092 	      s = SUBx ((i * 8), (i * 8) + 7, 0xff, SubS8);
3093 	      satrv [BITIDX8 (i)] = 0;
3094 	      r |= (s & 0xff) << (i * 8);
3095 	      SIMD8_SET (psr, NBIT8 (s), SIMD_NBIT, i);
3096 	      SIMD8_SET (psr, ZBIT8 (s), SIMD_ZBIT, i);
3097 	      SIMD8_SET (psr, carry, SIMD_CBIT, i);
3098 	      SIMD8_SET (psr, overflow, SIMD_VBIT, i);
3099 	      break;
3100 
3101 	    case UnsignedSaturation:
3102 	      s = SUBx ((i * 8), (i * 8) + 7, 0xff, SubU8);
3103 	      x = IwmmxtSaturateU8 (s, satrv + BITIDX8 (i));
3104 	      r |= (x & 0xff) << (i * 8);
3105 	      SIMD8_SET (psr, NBIT8 (x), SIMD_NBIT, i);
3106 	      SIMD8_SET (psr, ZBIT8 (x), SIMD_ZBIT, i);
3107 	      if (! satrv [BITIDX8 (i)])
3108 		{
3109 		  SIMD8_SET (psr, carry,     SIMD_CBIT, i);
3110 		  SIMD8_SET (psr, overflow, SIMD_VBIT, i);
3111 		}
3112 	      break;
3113 
3114 	    case SignedSaturation:
3115 	      s = SUBx ((i * 8), (i * 8) + 7, 0xff, SubS8);
3116 	      x = IwmmxtSaturateS8 (s, satrv + BITIDX8 (i));
3117 	      r |= (x & 0xff) << (i * 8);
3118 	      SIMD8_SET (psr, NBIT8 (x), SIMD_NBIT, i);
3119 	      SIMD8_SET (psr, ZBIT8 (x), SIMD_ZBIT, i);
3120 	      if (! satrv [BITIDX8 (i)])
3121 		{
3122 		  SIMD8_SET (psr, carry,     SIMD_CBIT, i);
3123 		  SIMD8_SET (psr, overflow, SIMD_VBIT, i);
3124 		}
3125 	      break;
3126 
3127 	    default:
3128 	      ARMul_UndefInstr (state, instr);
3129 	      return ARMul_DONE;
3130 	    }
3131 	}
3132       break;
3133 
3134     case Hqual:
3135       satrv[0] = satrv[2] = satrv[4] = satrv[6] = 0;
3136 
3137       for (i = 0; i < 4; i++)
3138 	{
3139 	  switch (BITS (20, 21))
3140 	    {
3141 	    case NoSaturation:
3142 	      s = SUBx ((i * 16), (i * 16) + 15, 0xffff, SubU16);
3143 	      satrv [BITIDX16 (i)] = 0;
3144 	      r |= (s & 0xffff) << (i * 16);
3145 	      SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
3146 	      SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
3147 	      SIMD16_SET (psr, carry,      SIMD_CBIT, i);
3148 	      SIMD16_SET (psr, overflow,   SIMD_VBIT, i);
3149 	      break;
3150 
3151 	    case UnsignedSaturation:
3152 	      s = SUBx ((i * 16), (i * 16) + 15, 0xffff, SubU16);
3153 	      x = IwmmxtSaturateU16 (s, satrv + BITIDX16 (i));
3154 	      r |= (x & 0xffff) << (i * 16);
3155 	      SIMD16_SET (psr, NBIT16 (x & 0xffff), SIMD_NBIT, i);
3156 	      SIMD16_SET (psr, ZBIT16 (x), SIMD_ZBIT, i);
3157 	      if (! satrv [BITIDX16 (i)])
3158 		{
3159 		  SIMD16_SET (psr, carry,    SIMD_CBIT, i);
3160 		  SIMD16_SET (psr, overflow, SIMD_VBIT, i);
3161 		}
3162 	      break;
3163 
3164 	    case SignedSaturation:
3165 	      s = SUBx ((i * 16), (i * 16) + 15, 0xffff, SubS16);
3166 	      x = IwmmxtSaturateS16 (s, satrv + BITIDX16 (i));
3167 	      r |= (x & 0xffff) << (i * 16);
3168 	      SIMD16_SET (psr, NBIT16 (x), SIMD_NBIT, i);
3169 	      SIMD16_SET (psr, ZBIT16 (x), SIMD_ZBIT, i);
3170 	      if (! satrv [BITIDX16 (i)])
3171 		{
3172 		  SIMD16_SET (psr, carry,    SIMD_CBIT, i);
3173 		  SIMD16_SET (psr, overflow, SIMD_VBIT, i);
3174 		}
3175 	      break;
3176 
3177 	    default:
3178 	      ARMul_UndefInstr (state, instr);
3179 	      return ARMul_DONE;
3180 	    }
3181 	}
3182       break;
3183 
3184     case Wqual:
3185       satrv[0] = satrv[1] = satrv[2] = satrv[4] = satrv[5] = satrv[6] = 0;
3186 
3187       for (i = 0; i < 2; i++)
3188 	{
3189 	  switch (BITS (20, 21))
3190 	    {
3191 	    case NoSaturation:
3192 	      s = SUBx ((i * 32), (i * 32) + 31, 0xffffffff, SubU32);
3193 	      satrv[BITIDX32 (i)] = 0;
3194 	      r |= (s & 0xffffffff) << (i * 32);
3195 	      SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
3196 	      SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
3197 	      SIMD32_SET (psr, carry,      SIMD_CBIT, i);
3198 	      SIMD32_SET (psr, overflow,   SIMD_VBIT, i);
3199 	      break;
3200 
3201 	    case UnsignedSaturation:
3202 	      s = SUBx ((i * 32), (i * 32) + 31, 0xffffffff, SubU32);
3203 	      x = IwmmxtSaturateU32 (s, satrv + BITIDX32 (i));
3204 	      r |= (x & 0xffffffff) << (i * 32);
3205 	      SIMD32_SET (psr, NBIT32 (x), SIMD_NBIT, i);
3206 	      SIMD32_SET (psr, ZBIT32 (x), SIMD_ZBIT, i);
3207 	      if (! satrv [BITIDX32 (i)])
3208 		{
3209 		  SIMD32_SET (psr, carry,    SIMD_CBIT, i);
3210 		  SIMD32_SET (psr, overflow, SIMD_VBIT, i);
3211 		}
3212 	      break;
3213 
3214 	    case SignedSaturation:
3215 	      s = SUBx ((i * 32), (i * 32) + 31, 0xffffffff, SubS32);
3216 	      x = IwmmxtSaturateS32 (s, satrv + BITIDX32 (i));
3217 	      r |= (x & 0xffffffff) << (i * 32);
3218 	      SIMD32_SET (psr, NBIT32 (x), SIMD_NBIT, i);
3219 	      SIMD32_SET (psr, ZBIT32 (x), SIMD_ZBIT, i);
3220 	      if (! satrv [BITIDX32 (i)])
3221 		{
3222 		  SIMD32_SET (psr, carry,    SIMD_CBIT, i);
3223 		  SIMD32_SET (psr, overflow, SIMD_VBIT, i);
3224 		}
3225 	      break;
3226 
3227 	    default:
3228 	      ARMul_UndefInstr (state, instr);
3229 	      return ARMul_DONE;
3230 	    }
3231 	}
3232       break;
3233 
3234     default:
3235       ARMul_UndefInstr (state, instr);
3236       return ARMul_DONE;
3237     }
3238 
3239   wR [BITS (12, 15)] = r;
3240   wC [wCASF] = psr;
3241   SET_wCSSFvec (satrv);
3242   wC [wCon] |= (WCON_CUP | WCON_MUP);
3243 
3244 #undef SUBx
3245 
3246   return ARMul_DONE;
3247 }
3248 
3249 static int
3250 WUNPCKEH (ARMul_State * state, ARMword instr)
3251 {
3252   ARMdword r = 0;
3253   ARMword  psr = 0;
3254   ARMdword s;
3255   int      i;
3256 
3257   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
3258     return ARMul_CANT;
3259 
3260 #ifdef DEBUG
3261   fprintf (stderr, "wunpckeh\n");
3262 #endif
3263 
3264   switch (BITS (22, 23))
3265     {
3266     case Bqual:
3267       for (i = 0; i < 4; i++)
3268 	{
3269 	  s = wRBYTE (BITS (16, 19), i + 4);
3270 
3271 	  if (BIT (21) && NBIT8 (s))
3272 	    s |= 0xff00;
3273 
3274 	  r |= (s & 0xffff) << (i * 16);
3275 	  SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
3276 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
3277 	}
3278       break;
3279 
3280     case Hqual:
3281       for (i = 0; i < 2; i++)
3282 	{
3283 	  s = wRHALF (BITS (16, 19), i + 2);
3284 
3285 	  if (BIT (21) && NBIT16 (s))
3286 	    s |= 0xffff0000;
3287 
3288 	  r |= (s & 0xffffffff) << (i * 32);
3289 	  SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
3290 	  SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
3291 	}
3292       break;
3293 
3294     case Wqual:
3295       r = wRWORD (BITS (16, 19), 1);
3296 
3297       if (BIT (21) && NBIT32 (r))
3298 	r |= 0xffffffff00000000ULL;
3299 
3300       SIMD64_SET (psr, NBIT64 (r), SIMD_NBIT);
3301       SIMD64_SET (psr, ZBIT64 (r), SIMD_ZBIT);
3302       break;
3303 
3304     default:
3305       ARMul_UndefInstr (state, instr);
3306       return ARMul_DONE;
3307     }
3308 
3309   wC [wCASF] = psr;
3310   wR [BITS (12, 15)] = r;
3311   wC [wCon] |= (WCON_CUP | WCON_MUP);
3312 
3313   return ARMul_DONE;
3314 }
3315 
3316 static int
3317 WUNPCKEL (ARMul_State * state, ARMword instr)
3318 {
3319   ARMdword r = 0;
3320   ARMword  psr = 0;
3321   ARMdword s;
3322   int      i;
3323 
3324   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
3325     return ARMul_CANT;
3326 
3327 #ifdef DEBUG
3328   fprintf (stderr, "wunpckel\n");
3329 #endif
3330 
3331   switch (BITS (22, 23))
3332     {
3333     case Bqual:
3334       for (i = 0; i < 4; i++)
3335 	{
3336 	  s = wRBYTE (BITS (16, 19), i);
3337 
3338 	  if (BIT (21) && NBIT8 (s))
3339 	    s |= 0xff00;
3340 
3341 	  r |= (s & 0xffff) << (i * 16);
3342 	  SIMD16_SET (psr, NBIT16 (s), SIMD_NBIT, i);
3343 	  SIMD16_SET (psr, ZBIT16 (s), SIMD_ZBIT, i);
3344 	}
3345       break;
3346 
3347     case Hqual:
3348       for (i = 0; i < 2; i++)
3349 	{
3350 	  s = wRHALF (BITS (16, 19), i);
3351 
3352 	  if (BIT (21) && NBIT16 (s))
3353 	    s |= 0xffff0000;
3354 
3355 	  r |= (s & 0xffffffff) << (i * 32);
3356 	  SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, i);
3357 	  SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, i);
3358 	}
3359       break;
3360 
3361     case Wqual:
3362       r = wRWORD (BITS (16, 19), 0);
3363 
3364       if (BIT (21) && NBIT32 (r))
3365 	r |= 0xffffffff00000000ULL;
3366 
3367       SIMD64_SET (psr, NBIT64 (r), SIMD_NBIT);
3368       SIMD64_SET (psr, ZBIT64 (r), SIMD_ZBIT);
3369       break;
3370 
3371     default:
3372       ARMul_UndefInstr (state, instr);
3373       return ARMul_DONE;
3374     }
3375 
3376   wC [wCASF] = psr;
3377   wR [BITS (12, 15)] = r;
3378   wC [wCon] |= (WCON_CUP | WCON_MUP);
3379 
3380   return ARMul_DONE;
3381 }
3382 
3383 static int
3384 WUNPCKIH (ARMul_State * state, ARMword instr)
3385 {
3386   ARMword  a, b;
3387   ARMdword r = 0;
3388   ARMword  psr = 0;
3389   ARMdword s;
3390   int      i;
3391 
3392   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
3393     return ARMul_CANT;
3394 
3395 #ifdef DEBUG
3396   fprintf (stderr, "wunpckih\n");
3397 #endif
3398 
3399   switch (BITS (22, 23))
3400     {
3401     case Bqual:
3402       for (i = 0; i < 4; i++)
3403 	{
3404 	  a = wRBYTE (BITS (16, 19), i + 4);
3405 	  b = wRBYTE (BITS ( 0,  3), i + 4);
3406 	  s = a | (b << 8);
3407 	  r |= (s & 0xffff) << (i * 16);
3408 	  SIMD8_SET (psr, NBIT8 (a), SIMD_NBIT, i * 2);
3409 	  SIMD8_SET (psr, ZBIT8 (a), SIMD_ZBIT, i * 2);
3410 	  SIMD8_SET (psr, NBIT8 (b), SIMD_NBIT, (i * 2) + 1);
3411 	  SIMD8_SET (psr, ZBIT8 (b), SIMD_ZBIT, (i * 2) + 1);
3412 	}
3413       break;
3414 
3415     case Hqual:
3416       for (i = 0; i < 2; i++)
3417 	{
3418 	  a = wRHALF (BITS (16, 19), i + 2);
3419 	  b = wRHALF (BITS ( 0,  3), i + 2);
3420 	  s = a | (b << 16);
3421 	  r |= (s & 0xffffffff) << (i * 32);
3422 	  SIMD16_SET (psr, NBIT16 (a), SIMD_NBIT, (i * 2));
3423 	  SIMD16_SET (psr, ZBIT16 (a), SIMD_ZBIT, (i * 2));
3424 	  SIMD16_SET (psr, NBIT16 (b), SIMD_NBIT, (i * 2) + 1);
3425 	  SIMD16_SET (psr, ZBIT16 (b), SIMD_ZBIT, (i * 2) + 1);
3426 	}
3427       break;
3428 
3429     case Wqual:
3430       a = wRWORD (BITS (16, 19), 1);
3431       s = wRWORD (BITS ( 0,  3), 1);
3432       r = a | (s << 32);
3433 
3434       SIMD32_SET (psr, NBIT32 (a), SIMD_NBIT, 0);
3435       SIMD32_SET (psr, ZBIT32 (a), SIMD_ZBIT, 0);
3436       SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, 1);
3437       SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, 1);
3438       break;
3439 
3440     default:
3441       ARMul_UndefInstr (state, instr);
3442       return ARMul_DONE;
3443     }
3444 
3445   wC [wCASF] = psr;
3446   wR [BITS (12, 15)] = r;
3447   wC [wCon] |= (WCON_CUP | WCON_MUP);
3448 
3449   return ARMul_DONE;
3450 }
3451 
3452 static int
3453 WUNPCKIL (ARMul_State * state, ARMword instr)
3454 {
3455   ARMword  a, b;
3456   ARMdword r = 0;
3457   ARMword  psr = 0;
3458   ARMdword s;
3459   int      i;
3460 
3461   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
3462     return ARMul_CANT;
3463 
3464 #ifdef DEBUG
3465   fprintf (stderr, "wunpckil\n");
3466 #endif
3467 
3468   switch (BITS (22, 23))
3469     {
3470     case Bqual:
3471       for (i = 0; i < 4; i++)
3472 	{
3473 	  a = wRBYTE (BITS (16, 19), i);
3474 	  b = wRBYTE (BITS ( 0,  3), i);
3475 	  s = a | (b << 8);
3476 	  r |= (s & 0xffff) << (i * 16);
3477 	  SIMD8_SET (psr, NBIT8 (a), SIMD_NBIT, i * 2);
3478 	  SIMD8_SET (psr, ZBIT8 (a), SIMD_ZBIT, i * 2);
3479 	  SIMD8_SET (psr, NBIT8 (b), SIMD_NBIT, (i * 2) + 1);
3480 	  SIMD8_SET (psr, ZBIT8 (b), SIMD_ZBIT, (i * 2) + 1);
3481 	}
3482       break;
3483 
3484     case Hqual:
3485       for (i = 0; i < 2; i++)
3486 	{
3487 	  a = wRHALF (BITS (16, 19), i);
3488 	  b = wRHALF (BITS ( 0,  3), i);
3489 	  s = a | (b << 16);
3490 	  r |= (s & 0xffffffff) << (i * 32);
3491 	  SIMD16_SET (psr, NBIT16 (a), SIMD_NBIT, (i * 2));
3492 	  SIMD16_SET (psr, ZBIT16 (a), SIMD_ZBIT, (i * 2));
3493 	  SIMD16_SET (psr, NBIT16 (b), SIMD_NBIT, (i * 2) + 1);
3494 	  SIMD16_SET (psr, ZBIT16 (b), SIMD_ZBIT, (i * 2) + 1);
3495 	}
3496       break;
3497 
3498     case Wqual:
3499       a = wRWORD (BITS (16, 19), 0);
3500       s = wRWORD (BITS ( 0,  3), 0);
3501       r = a | (s << 32);
3502 
3503       SIMD32_SET (psr, NBIT32 (a), SIMD_NBIT, 0);
3504       SIMD32_SET (psr, ZBIT32 (a), SIMD_ZBIT, 0);
3505       SIMD32_SET (psr, NBIT32 (s), SIMD_NBIT, 1);
3506       SIMD32_SET (psr, ZBIT32 (s), SIMD_ZBIT, 1);
3507       break;
3508 
3509     default:
3510       ARMul_UndefInstr (state, instr);
3511       return ARMul_DONE;
3512     }
3513 
3514   wC [wCASF] = psr;
3515   wR [BITS (12, 15)] = r;
3516   wC [wCon] |= (WCON_CUP | WCON_MUP);
3517 
3518   return ARMul_DONE;
3519 }
3520 
3521 static int
3522 WXOR (ARMword instr)
3523 {
3524   ARMword psr = 0;
3525   ARMdword result;
3526 
3527   if ((read_cp15_reg (15, 0, 1) & 3) != 3)
3528     return ARMul_CANT;
3529 
3530 #ifdef DEBUG
3531   fprintf (stderr, "wxor\n");
3532 #endif
3533 
3534   result = wR [BITS (16, 19)] ^ wR [BITS (0, 3)];
3535   wR [BITS (12, 15)] = result;
3536 
3537   SIMD64_SET (psr, (result == 0), SIMD_ZBIT);
3538   SIMD64_SET (psr, (result & (1ULL << 63)), SIMD_NBIT);
3539 
3540   wC [wCASF] = psr;
3541   wC [wCon] |= (WCON_CUP | WCON_MUP);
3542 
3543   return ARMul_DONE;
3544 }
3545 
3546 /* This switch table is moved to a separate function in order
3547    to work around a compiler bug in the host compiler...  */
3548 
3549 static int
3550 Process_Instruction (ARMul_State * state, ARMword instr)
3551 {
3552   int status = ARMul_BUSY;
3553 
3554   switch ((BITS (20, 23) << 8) | BITS (4, 11))
3555     {
3556     case 0x000: status = WOR (instr); break;
3557     case 0x011: status = TMCR (state, instr); break;
3558     case 0x100: status = WXOR (instr); break;
3559     case 0x111: status = TMRC (state, instr); break;
3560     case 0x300: status = WANDN (instr); break;
3561     case 0x200: status = WAND (instr); break;
3562 
3563     case 0x810: case 0xa10:
3564       status = WMADD (instr); break;
3565 
3566     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:
3567       status = WUNPCKIL (state, instr); break;
3568     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:
3569       status = WUNPCKIH (state, instr); break;
3570     case 0x012: case 0x112: case 0x412: case 0x512:
3571       status = WSAD (instr); break;
3572     case 0x010: case 0x110: case 0x210: case 0x310:
3573       status = WMUL (instr); break;
3574     case 0x410: case 0x510: case 0x610: case 0x710:
3575       status = WMAC (instr); break;
3576     case 0x006: case 0x406: case 0x806: case 0xc06:
3577       status = WCMPEQ (state, instr); break;
3578     case 0x800: case 0x900: case 0xc00: case 0xd00:
3579       status = WAVG2 (instr); break;
3580     case 0x802: case 0x902: case 0xa02: case 0xb02:
3581       status = WALIGNR (state, instr); break;
3582     case 0x601: case 0x605: case 0x609: case 0x60d:
3583       status = TINSR (state, instr); break;
3584     case 0x107: case 0x507: case 0x907: case 0xd07:
3585       status = TEXTRM (state, instr); break;
3586     case 0x117: case 0x517: case 0x917: case 0xd17:
3587       status = TEXTRC (state, instr); break;
3588     case 0x401: case 0x405: case 0x409: case 0x40d:
3589       status = TBCST (state, instr); break;
3590     case 0x113: case 0x513: case 0x913: case 0xd13:
3591       status = TANDC (state, instr); break;
3592     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:
3593       status = WACC (state, instr); break;
3594     case 0x115: case 0x515: case 0x915: case 0xd15:
3595       status = TORC (state, instr); break;
3596     case 0x103: case 0x503: case 0x903: case 0xd03:
3597       status = TMOVMSK (state, instr); break;
3598     case 0x106: case 0x306: case 0x506: case 0x706:
3599     case 0x906: case 0xb06: case 0xd06: case 0xf06:
3600       status = WCMPGT (state, instr); break;
3601     case 0x00e: case 0x20e: case 0x40e: case 0x60e:
3602     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
3603       status = WUNPCKEL (state, instr); break;
3604     case 0x00c: case 0x20c: case 0x40c: case 0x60c:
3605     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
3606       status = WUNPCKEH (state, instr); break;
3607     case 0x204: case 0x604: case 0xa04: case 0xe04:
3608     case 0x214: case 0x614: case 0xa14: case 0xe14:
3609       status = WSRL (state, instr); break;
3610     case 0x004: case 0x404: case 0x804: case 0xc04:
3611     case 0x014: case 0x414: case 0x814: case 0xc14:
3612       status = WSRA (state, instr); break;
3613     case 0x104: case 0x504: case 0x904: case 0xd04:
3614     case 0x114: case 0x514: case 0x914: case 0xd14:
3615       status = WSLL (state, instr); break;
3616     case 0x304: case 0x704: case 0xb04: case 0xf04:
3617     case 0x314: case 0x714: case 0xb14: case 0xf14:
3618       status = WROR (state, instr); break;
3619     case 0x116: case 0x316: case 0x516: case 0x716:
3620     case 0x916: case 0xb16: case 0xd16: case 0xf16:
3621       status = WMIN (state, instr); break;
3622     case 0x016: case 0x216: case 0x416: case 0x616:
3623     case 0x816: case 0xa16: case 0xc16: case 0xe16:
3624       status = WMAX (state, instr); break;
3625     case 0x002: case 0x102: case 0x202: case 0x302:
3626     case 0x402: case 0x502: case 0x602: case 0x702:
3627       status = WALIGNI (instr); break;
3628     case 0x01a: case 0x11a: case 0x21a: case 0x31a:
3629     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
3630     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
3631     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
3632       status = WSUB (state, instr); break;
3633     case 0x01e: case 0x11e: case 0x21e: case 0x31e:
3634     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
3635     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
3636     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
3637       status = WSHUFH (instr); break;
3638     case 0x018: case 0x118: case 0x218: case 0x318:
3639     case 0x418: case 0x518: case 0x618: case 0x718:
3640     case 0x818: case 0x918: case 0xa18: case 0xb18:
3641     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
3642       status = WADD (state, instr); break;
3643     case 0x008: case 0x108: case 0x208: case 0x308:
3644     case 0x408: case 0x508: case 0x608: case 0x708:
3645     case 0x808: case 0x908: case 0xa08: case 0xb08:
3646     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
3647       status = WPACK (state, instr); break;
3648     case 0x201: case 0x203: case 0x205: case 0x207:
3649     case 0x209: case 0x20b: case 0x20d: case 0x20f:
3650     case 0x211: case 0x213: case 0x215: case 0x217:
3651     case 0x219: case 0x21b: case 0x21d: case 0x21f:
3652       switch (BITS (16, 19))
3653 	{
3654 	case 0x0: status = TMIA (state, instr); break;
3655 	case 0x8: status = TMIAPH (state, instr); break;
3656 	case 0xc:
3657 	case 0xd:
3658 	case 0xe:
3659 	case 0xf: status = TMIAxy (state, instr); break;
3660 	default: break;
3661 	}
3662       break;
3663     default:
3664       break;
3665     }
3666   return status;
3667 }
3668 
3669 /* Process a possibly Intel(r) Wireless MMX(tm) technology instruction.
3670    Return true if the instruction was handled.  */
3671 
3672 int
3673 ARMul_HandleIwmmxt (ARMul_State * state, ARMword instr)
3674 {
3675   int status = ARMul_BUSY;
3676 
3677   if (BITS (24, 27) == 0xe)
3678     {
3679       status = Process_Instruction (state, instr);
3680     }
3681   else if (BITS (25, 27) == 0x6)
3682     {
3683       if (BITS (4, 11) == 0x0 && BITS (20, 24) == 0x4)
3684 	status = TMCRR (state, instr);
3685       else if (BITS (9, 11) == 0x0)
3686 	{
3687 	  if (BIT (20) == 0x0)
3688 	    status = WSTR (state, instr);
3689 	  else if (BITS (20, 24) == 0x5)
3690 	    status = TMRRC (state, instr);
3691 	  else
3692 	    status = WLDR (state, instr);
3693 	}
3694     }
3695 
3696   if (status == ARMul_CANT)
3697     {
3698       /* If the instruction was a recognised but illegal,
3699 	 perform the abort here rather than returning false.
3700 	 If we return false then ARMul_MRC may be called which
3701 	 will still abort, but which also perform the register
3702 	 transfer...  */
3703       ARMul_Abort (state, ARMul_UndefinedInstrV);
3704       status = ARMul_DONE;
3705     }
3706 
3707   return status == ARMul_DONE;
3708 }
3709 
3710 int
3711 Fetch_Iwmmxt_Register (unsigned int regnum, unsigned char * memory)
3712 {
3713   if (regnum >= 16)
3714     {
3715       memcpy (memory, wC + (regnum - 16), sizeof wC [0]);
3716       return sizeof wC [0];
3717     }
3718   else
3719     {
3720       memcpy (memory, wR + regnum, sizeof wR [0]);
3721       return sizeof wR [0];
3722     }
3723 }
3724 
3725 int
3726 Store_Iwmmxt_Register (unsigned int regnum, const unsigned char * memory)
3727 {
3728   if (regnum >= 16)
3729     {
3730       memcpy (wC + (regnum - 16), memory, sizeof wC [0]);
3731       return sizeof wC [0];
3732     }
3733   else
3734     {
3735       memcpy (wR + regnum, memory, sizeof wR [0]);
3736       return sizeof wR [0];
3737     }
3738 }
3739