1 /* simulator.c -- Interface for the AArch64 simulator. 2 3 Copyright (C) 2015-2017 Free Software Foundation, Inc. 4 5 Contributed by Red Hat. 6 7 This file is part of GDB. 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or 12 (at your option) any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include <stdlib.h> 24 #include <stdio.h> 25 #include <string.h> 26 #include <sys/types.h> 27 #include <math.h> 28 #include <time.h> 29 #include <limits.h> 30 31 #include "simulator.h" 32 #include "cpustate.h" 33 #include "memory.h" 34 35 #define NO_SP 0 36 #define SP_OK 1 37 38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag)) 39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0) 40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1) 41 42 /* Space saver macro. */ 43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW)) 44 45 #define HALT_UNALLOC \ 46 do \ 47 { \ 48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \ 49 TRACE_INSN (cpu, \ 50 "Unallocated instruction detected at sim line %d," \ 51 " exe addr %" PRIx64, \ 52 __LINE__, aarch64_get_PC (cpu)); \ 53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ 54 sim_stopped, SIM_SIGILL); \ 55 } \ 56 while (0) 57 58 #define HALT_NYI \ 59 do \ 60 { \ 61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \ 62 TRACE_INSN (cpu, \ 63 "Unimplemented instruction detected at sim line %d," \ 64 " exe addr %" PRIx64, \ 65 __LINE__, aarch64_get_PC (cpu)); \ 66 if (! TRACE_ANY_P (cpu)) \ 67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \ 68 aarch64_get_instr (cpu)); \ 69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ 70 sim_stopped, SIM_SIGABRT); \ 71 } \ 72 while (0) 73 74 #define NYI_assert(HI, LO, EXPECTED) \ 75 do \ 76 { \ 77 if (INSTR ((HI), (LO)) != (EXPECTED)) \ 78 HALT_NYI; \ 79 } \ 80 while (0) 81 82 /* Helper functions used by expandLogicalImmediate. */ 83 84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */ 85 static inline uint64_t 86 ones (int N) 87 { 88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1)); 89 } 90 91 /* result<0> to val<N> */ 92 static inline uint64_t 93 pickbit (uint64_t val, int N) 94 { 95 return pickbits64 (val, N, N); 96 } 97 98 static uint64_t 99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N) 100 { 101 uint64_t mask; 102 uint64_t imm; 103 unsigned simd_size; 104 105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R 106 (in other words, right rotated by R), then replicated. */ 107 if (N != 0) 108 { 109 simd_size = 64; 110 mask = 0xffffffffffffffffull; 111 } 112 else 113 { 114 switch (S) 115 { 116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; 117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; 118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; 119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; 120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break; 121 default: return 0; 122 } 123 mask = (1ull << simd_size) - 1; 124 /* Top bits are IGNORED. */ 125 R &= simd_size - 1; 126 } 127 128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */ 129 if (S == simd_size - 1) 130 return 0; 131 132 /* S+1 consecutive bits to 1. */ 133 /* NOTE: S can't be 63 due to detection above. */ 134 imm = (1ull << (S + 1)) - 1; 135 136 /* Rotate to the left by simd_size - R. */ 137 if (R != 0) 138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); 139 140 /* Replicate the value according to SIMD size. */ 141 switch (simd_size) 142 { 143 case 2: imm = (imm << 2) | imm; 144 case 4: imm = (imm << 4) | imm; 145 case 8: imm = (imm << 8) | imm; 146 case 16: imm = (imm << 16) | imm; 147 case 32: imm = (imm << 32) | imm; 148 case 64: break; 149 default: return 0; 150 } 151 152 return imm; 153 } 154 155 /* Instr[22,10] encodes N immr and imms. we want a lookup table 156 for each possible combination i.e. 13 bits worth of int entries. */ 157 #define LI_TABLE_SIZE (1 << 13) 158 static uint64_t LITable[LI_TABLE_SIZE]; 159 160 void 161 aarch64_init_LIT_table (void) 162 { 163 unsigned index; 164 165 for (index = 0; index < LI_TABLE_SIZE; index++) 166 { 167 uint32_t N = uimm (index, 12, 12); 168 uint32_t immr = uimm (index, 11, 6); 169 uint32_t imms = uimm (index, 5, 0); 170 171 LITable [index] = expand_logical_immediate (imms, immr, N); 172 } 173 } 174 175 static void 176 dexNotify (sim_cpu *cpu) 177 { 178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry 179 2 ==> exit Java, 3 ==> start next bytecode. */ 180 uint32_t type = INSTR (14, 0); 181 182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type); 183 184 switch (type) 185 { 186 case 0: 187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0), 188 aarch64_get_reg_u64 (cpu, R22, 0)); */ 189 break; 190 case 1: 191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0), 192 aarch64_get_reg_u64 (cpu, R22, 0)); */ 193 break; 194 case 2: 195 /* aarch64_notifyMethodExit (); */ 196 break; 197 case 3: 198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0), 199 aarch64_get_reg_u64 (cpu, R22, 0)); */ 200 break; 201 } 202 } 203 204 /* secondary decode within top level groups */ 205 206 static void 207 dexPseudo (sim_cpu *cpu) 208 { 209 /* assert instr[28,27] = 00 210 211 We provide 2 pseudo instructions: 212 213 HALT stops execution of the simulator causing an immediate 214 return to the x86 code which entered it. 215 216 CALLOUT initiates recursive entry into x86 code. A register 217 argument holds the address of the x86 routine. Immediate 218 values in the instruction identify the number of general 219 purpose and floating point register arguments to be passed 220 and the type of any value to be returned. */ 221 222 uint32_t PSEUDO_HALT = 0xE0000000U; 223 uint32_t PSEUDO_CALLOUT = 0x00018000U; 224 uint32_t PSEUDO_CALLOUTR = 0x00018001U; 225 uint32_t PSEUDO_NOTIFY = 0x00014000U; 226 uint32_t dispatch; 227 228 if (aarch64_get_instr (cpu) == PSEUDO_HALT) 229 { 230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction"); 231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 232 sim_stopped, SIM_SIGTRAP); 233 } 234 235 dispatch = INSTR (31, 15); 236 237 /* We do not handle callouts at the moment. */ 238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR) 239 { 240 TRACE_EVENTS (cpu, " Callout"); 241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 242 sim_stopped, SIM_SIGABRT); 243 } 244 245 else if (dispatch == PSEUDO_NOTIFY) 246 dexNotify (cpu); 247 248 else 249 HALT_UNALLOC; 250 } 251 252 /* Load-store single register (unscaled offset) 253 These instructions employ a base register plus an unscaled signed 254 9 bit offset. 255 256 N.B. the base register (source) can be Xn or SP. all other 257 registers may not be SP. */ 258 259 /* 32 bit load 32 bit unscaled signed 9 bit. */ 260 static void 261 ldur32 (sim_cpu *cpu, int32_t offset) 262 { 263 unsigned rn = INSTR (9, 5); 264 unsigned rt = INSTR (4, 0); 265 266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 269 + offset)); 270 } 271 272 /* 64 bit load 64 bit unscaled signed 9 bit. */ 273 static void 274 ldur64 (sim_cpu *cpu, int32_t offset) 275 { 276 unsigned rn = INSTR (9, 5); 277 unsigned rt = INSTR (4, 0); 278 279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 282 + offset)); 283 } 284 285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */ 286 static void 287 ldurb32 (sim_cpu *cpu, int32_t offset) 288 { 289 unsigned rn = INSTR (9, 5); 290 unsigned rt = INSTR (4, 0); 291 292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 295 + offset)); 296 } 297 298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */ 299 static void 300 ldursb32 (sim_cpu *cpu, int32_t offset) 301 { 302 unsigned rn = INSTR (9, 5); 303 unsigned rt = INSTR (4, 0); 304 305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8 307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 308 + offset)); 309 } 310 311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */ 312 static void 313 ldursb64 (sim_cpu *cpu, int32_t offset) 314 { 315 unsigned rn = INSTR (9, 5); 316 unsigned rt = INSTR (4, 0); 317 318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8 320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 321 + offset)); 322 } 323 324 /* 32 bit load zero-extended short unscaled signed 9 bit */ 325 static void 326 ldurh32 (sim_cpu *cpu, int32_t offset) 327 { 328 unsigned rn = INSTR (9, 5); 329 unsigned rd = INSTR (4, 0); 330 331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16 333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 334 + offset)); 335 } 336 337 /* 32 bit load sign-extended short unscaled signed 9 bit */ 338 static void 339 ldursh32 (sim_cpu *cpu, int32_t offset) 340 { 341 unsigned rn = INSTR (9, 5); 342 unsigned rd = INSTR (4, 0); 343 344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16 346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 347 + offset)); 348 } 349 350 /* 64 bit load sign-extended short unscaled signed 9 bit */ 351 static void 352 ldursh64 (sim_cpu *cpu, int32_t offset) 353 { 354 unsigned rn = INSTR (9, 5); 355 unsigned rt = INSTR (4, 0); 356 357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16 359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 360 + offset)); 361 } 362 363 /* 64 bit load sign-extended word unscaled signed 9 bit */ 364 static void 365 ldursw (sim_cpu *cpu, int32_t offset) 366 { 367 unsigned rn = INSTR (9, 5); 368 unsigned rd = INSTR (4, 0); 369 370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32 372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 373 + offset)); 374 } 375 376 /* N.B. with stores the value in source is written to the address 377 identified by source2 modified by offset. */ 378 379 /* 32 bit store 32 bit unscaled signed 9 bit. */ 380 static void 381 stur32 (sim_cpu *cpu, int32_t offset) 382 { 383 unsigned rn = INSTR (9, 5); 384 unsigned rd = INSTR (4, 0); 385 386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 387 aarch64_set_mem_u32 (cpu, 388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 389 aarch64_get_reg_u32 (cpu, rd, NO_SP)); 390 } 391 392 /* 64 bit store 64 bit unscaled signed 9 bit */ 393 static void 394 stur64 (sim_cpu *cpu, int32_t offset) 395 { 396 unsigned rn = INSTR (9, 5); 397 unsigned rd = INSTR (4, 0); 398 399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 400 aarch64_set_mem_u64 (cpu, 401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 402 aarch64_get_reg_u64 (cpu, rd, NO_SP)); 403 } 404 405 /* 32 bit store byte unscaled signed 9 bit */ 406 static void 407 sturb (sim_cpu *cpu, int32_t offset) 408 { 409 unsigned rn = INSTR (9, 5); 410 unsigned rd = INSTR (4, 0); 411 412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 413 aarch64_set_mem_u8 (cpu, 414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 415 aarch64_get_reg_u8 (cpu, rd, NO_SP)); 416 } 417 418 /* 32 bit store short unscaled signed 9 bit */ 419 static void 420 sturh (sim_cpu *cpu, int32_t offset) 421 { 422 unsigned rn = INSTR (9, 5); 423 unsigned rd = INSTR (4, 0); 424 425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 426 aarch64_set_mem_u16 (cpu, 427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 428 aarch64_get_reg_u16 (cpu, rd, NO_SP)); 429 } 430 431 /* Load single register pc-relative label 432 Offset is a signed 19 bit immediate count in words 433 rt may not be SP. */ 434 435 /* 32 bit pc-relative load */ 436 static void 437 ldr32_pcrel (sim_cpu *cpu, int32_t offset) 438 { 439 unsigned rd = INSTR (4, 0); 440 441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 442 aarch64_set_reg_u64 (cpu, rd, NO_SP, 443 aarch64_get_mem_u32 444 (cpu, aarch64_get_PC (cpu) + offset * 4)); 445 } 446 447 /* 64 bit pc-relative load */ 448 static void 449 ldr_pcrel (sim_cpu *cpu, int32_t offset) 450 { 451 unsigned rd = INSTR (4, 0); 452 453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 454 aarch64_set_reg_u64 (cpu, rd, NO_SP, 455 aarch64_get_mem_u64 456 (cpu, aarch64_get_PC (cpu) + offset * 4)); 457 } 458 459 /* sign extended 32 bit pc-relative load */ 460 static void 461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset) 462 { 463 unsigned rd = INSTR (4, 0); 464 465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 466 aarch64_set_reg_u64 (cpu, rd, NO_SP, 467 aarch64_get_mem_s32 468 (cpu, aarch64_get_PC (cpu) + offset * 4)); 469 } 470 471 /* float pc-relative load */ 472 static void 473 fldrs_pcrel (sim_cpu *cpu, int32_t offset) 474 { 475 unsigned int rd = INSTR (4, 0); 476 477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 478 aarch64_set_vec_u32 (cpu, rd, 0, 479 aarch64_get_mem_u32 480 (cpu, aarch64_get_PC (cpu) + offset * 4)); 481 } 482 483 /* double pc-relative load */ 484 static void 485 fldrd_pcrel (sim_cpu *cpu, int32_t offset) 486 { 487 unsigned int st = INSTR (4, 0); 488 489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 490 aarch64_set_vec_u64 (cpu, st, 0, 491 aarch64_get_mem_u64 492 (cpu, aarch64_get_PC (cpu) + offset * 4)); 493 } 494 495 /* long double pc-relative load. */ 496 static void 497 fldrq_pcrel (sim_cpu *cpu, int32_t offset) 498 { 499 unsigned int st = INSTR (4, 0); 500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4; 501 FRegister a; 502 503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 504 aarch64_get_mem_long_double (cpu, addr, & a); 505 aarch64_set_FP_long_double (cpu, st, a); 506 } 507 508 /* This can be used to scale an offset by applying 509 the requisite shift. the second argument is either 510 16, 32 or 64. */ 511 512 #define SCALE(_offset, _elementSize) \ 513 ((_offset) << ScaleShift ## _elementSize) 514 515 /* This can be used to optionally scale a register derived offset 516 by applying the requisite shift as indicated by the Scaling 517 argument. The second argument is either Byte, Short, Word 518 or Long. The third argument is either Scaled or Unscaled. 519 N.B. when _Scaling is Scaled the shift gets ANDed with 520 all 1s while when it is Unscaled it gets ANDed with 0. */ 521 522 #define OPT_SCALE(_offset, _elementType, _Scaling) \ 523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0)) 524 525 /* This can be used to zero or sign extend a 32 bit register derived 526 value to a 64 bit value. the first argument must be the value as 527 a uint32_t and the second must be either UXTW or SXTW. The result 528 is returned as an int64_t. */ 529 530 static inline int64_t 531 extend (uint32_t value, Extension extension) 532 { 533 union 534 { 535 uint32_t u; 536 int32_t n; 537 } x; 538 539 /* A branchless variant of this ought to be possible. */ 540 if (extension == UXTW || extension == NoExtension) 541 return value; 542 543 x.u = value; 544 return x.n; 545 } 546 547 /* Scalar Floating Point 548 549 FP load/store single register (4 addressing modes) 550 551 N.B. the base register (source) can be the stack pointer. 552 The secondary source register (source2) can only be an Xn register. */ 553 554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */ 555 static void 556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 557 { 558 unsigned rn = INSTR (9, 5); 559 unsigned st = INSTR (4, 0); 560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 561 562 if (wb != Post) 563 address += offset; 564 565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address)); 567 if (wb == Post) 568 address += offset; 569 570 if (wb != NoWriteBack) 571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 572 } 573 574 /* Load 8 bit with unsigned 12 bit offset. */ 575 static void 576 fldrb_abs (sim_cpu *cpu, uint32_t offset) 577 { 578 unsigned rd = INSTR (4, 0); 579 unsigned rn = INSTR (9, 5); 580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; 581 582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr)); 584 } 585 586 /* Load 16 bit scaled unsigned 12 bit. */ 587 static void 588 fldrh_abs (sim_cpu *cpu, uint32_t offset) 589 { 590 unsigned rd = INSTR (4, 0); 591 unsigned rn = INSTR (9, 5); 592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16); 593 594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr)); 596 } 597 598 /* Load 32 bit scaled unsigned 12 bit. */ 599 static void 600 fldrs_abs (sim_cpu *cpu, uint32_t offset) 601 { 602 unsigned rd = INSTR (4, 0); 603 unsigned rn = INSTR (9, 5); 604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32); 605 606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr)); 608 } 609 610 /* Load 64 bit scaled unsigned 12 bit. */ 611 static void 612 fldrd_abs (sim_cpu *cpu, uint32_t offset) 613 { 614 unsigned rd = INSTR (4, 0); 615 unsigned rn = INSTR (9, 5); 616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64); 617 618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr)); 620 } 621 622 /* Load 128 bit scaled unsigned 12 bit. */ 623 static void 624 fldrq_abs (sim_cpu *cpu, uint32_t offset) 625 { 626 unsigned rd = INSTR (4, 0); 627 unsigned rn = INSTR (9, 5); 628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); 629 630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr)); 632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8)); 633 } 634 635 /* Load 32 bit scaled or unscaled zero- or sign-extended 636 32-bit register offset. */ 637 static void 638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 639 { 640 unsigned rm = INSTR (20, 16); 641 unsigned rn = INSTR (9, 5); 642 unsigned st = INSTR (4, 0); 643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 645 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 646 647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 649 (cpu, address + displacement)); 650 } 651 652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ 653 static void 654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 655 { 656 unsigned rn = INSTR (9, 5); 657 unsigned st = INSTR (4, 0); 658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 659 660 if (wb != Post) 661 address += offset; 662 663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address)); 665 666 if (wb == Post) 667 address += offset; 668 669 if (wb != NoWriteBack) 670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 671 } 672 673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */ 674 static void 675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 676 { 677 unsigned rm = INSTR (20, 16); 678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 679 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 680 681 fldrd_wb (cpu, displacement, NoWriteBack); 682 } 683 684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */ 685 static void 686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 687 { 688 FRegister a; 689 unsigned rn = INSTR (9, 5); 690 unsigned st = INSTR (4, 0); 691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 692 693 if (wb != Post) 694 address += offset; 695 696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 697 aarch64_get_mem_long_double (cpu, address, & a); 698 aarch64_set_FP_long_double (cpu, st, a); 699 700 if (wb == Post) 701 address += offset; 702 703 if (wb != NoWriteBack) 704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 705 } 706 707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */ 708 static void 709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 710 { 711 unsigned rm = INSTR (20, 16); 712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 713 uint64_t displacement = OPT_SCALE (extended, 128, scaling); 714 715 fldrq_wb (cpu, displacement, NoWriteBack); 716 } 717 718 /* Memory Access 719 720 load-store single register 721 There are four addressing modes available here which all employ a 722 64 bit source (base) register. 723 724 N.B. the base register (source) can be the stack pointer. 725 The secondary source register (source2)can only be an Xn register. 726 727 Scaled, 12-bit, unsigned immediate offset, without pre- and 728 post-index options. 729 Unscaled, 9-bit, signed immediate offset with pre- or post-index 730 writeback. 731 scaled or unscaled 64-bit register offset. 732 scaled or unscaled 32-bit extended register offset. 733 734 All offsets are assumed to be raw from the decode i.e. the 735 simulator is expected to adjust scaled offsets based on the 736 accessed data size with register or extended register offset 737 versions the same applies except that in the latter case the 738 operation may also require a sign extend. 739 740 A separate method is provided for each possible addressing mode. */ 741 742 /* 32 bit load 32 bit scaled unsigned 12 bit */ 743 static void 744 ldr32_abs (sim_cpu *cpu, uint32_t offset) 745 { 746 unsigned rn = INSTR (9, 5); 747 unsigned rt = INSTR (4, 0); 748 749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 750 /* The target register may not be SP but the source may be. */ 751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 753 + SCALE (offset, 32))); 754 } 755 756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */ 757 static void 758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 759 { 760 unsigned rn = INSTR (9, 5); 761 unsigned rt = INSTR (4, 0); 762 uint64_t address; 763 764 if (rn == rt && wb != NoWriteBack) 765 HALT_UNALLOC; 766 767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 768 769 if (wb != Post) 770 address += offset; 771 772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); 774 775 if (wb == Post) 776 address += offset; 777 778 if (wb != NoWriteBack) 779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 780 } 781 782 /* 32 bit load 32 bit scaled or unscaled 783 zero- or sign-extended 32-bit register offset */ 784 static void 785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 786 { 787 unsigned rm = INSTR (20, 16); 788 unsigned rn = INSTR (9, 5); 789 unsigned rt = INSTR (4, 0); 790 /* rn may reference SP, rm and rt must reference ZR */ 791 792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 794 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 795 796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 797 aarch64_set_reg_u64 (cpu, rt, NO_SP, 798 aarch64_get_mem_u32 (cpu, address + displacement)); 799 } 800 801 /* 64 bit load 64 bit scaled unsigned 12 bit */ 802 static void 803 ldr_abs (sim_cpu *cpu, uint32_t offset) 804 { 805 unsigned rn = INSTR (9, 5); 806 unsigned rt = INSTR (4, 0); 807 808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 809 /* The target register may not be SP but the source may be. */ 810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 812 + SCALE (offset, 64))); 813 } 814 815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ 816 static void 817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 818 { 819 unsigned rn = INSTR (9, 5); 820 unsigned rt = INSTR (4, 0); 821 uint64_t address; 822 823 if (rn == rt && wb != NoWriteBack) 824 HALT_UNALLOC; 825 826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 827 828 if (wb != Post) 829 address += offset; 830 831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); 833 834 if (wb == Post) 835 address += offset; 836 837 if (wb != NoWriteBack) 838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 839 } 840 841 /* 64 bit load 64 bit scaled or unscaled zero- 842 or sign-extended 32-bit register offset. */ 843 static void 844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 845 { 846 unsigned rm = INSTR (20, 16); 847 unsigned rn = INSTR (9, 5); 848 unsigned rt = INSTR (4, 0); 849 /* rn may reference SP, rm and rt must reference ZR */ 850 851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 853 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 854 855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 856 aarch64_set_reg_u64 (cpu, rt, NO_SP, 857 aarch64_get_mem_u64 (cpu, address + displacement)); 858 } 859 860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */ 861 static void 862 ldrb32_abs (sim_cpu *cpu, uint32_t offset) 863 { 864 unsigned rn = INSTR (9, 5); 865 unsigned rt = INSTR (4, 0); 866 867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 868 /* The target register may not be SP but the source may be 869 there is no scaling required for a byte load. */ 870 aarch64_set_reg_u64 (cpu, rt, NO_SP, 871 aarch64_get_mem_u8 872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); 873 } 874 875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */ 876 static void 877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 878 { 879 unsigned rn = INSTR (9, 5); 880 unsigned rt = INSTR (4, 0); 881 uint64_t address; 882 883 if (rn == rt && wb != NoWriteBack) 884 HALT_UNALLOC; 885 886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 887 888 if (wb != Post) 889 address += offset; 890 891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); 893 894 if (wb == Post) 895 address += offset; 896 897 if (wb != NoWriteBack) 898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 899 } 900 901 /* 32 bit load zero-extended byte scaled or unscaled zero- 902 or sign-extended 32-bit register offset. */ 903 static void 904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 905 { 906 unsigned rm = INSTR (20, 16); 907 unsigned rn = INSTR (9, 5); 908 unsigned rt = INSTR (4, 0); 909 /* rn may reference SP, rm and rt must reference ZR */ 910 911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 913 extension); 914 915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 916 /* There is no scaling required for a byte load. */ 917 aarch64_set_reg_u64 (cpu, rt, NO_SP, 918 aarch64_get_mem_u8 (cpu, address + displacement)); 919 } 920 921 /* 64 bit load sign-extended byte unscaled signed 9 bit 922 with pre- or post-writeback. */ 923 static void 924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 925 { 926 unsigned rn = INSTR (9, 5); 927 unsigned rt = INSTR (4, 0); 928 uint64_t address; 929 int64_t val; 930 931 if (rn == rt && wb != NoWriteBack) 932 HALT_UNALLOC; 933 934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 935 936 if (wb != Post) 937 address += offset; 938 939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 940 val = aarch64_get_mem_s8 (cpu, address); 941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 942 943 if (wb == Post) 944 address += offset; 945 946 if (wb != NoWriteBack) 947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 948 } 949 950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */ 951 static void 952 ldrsb_abs (sim_cpu *cpu, uint32_t offset) 953 { 954 ldrsb_wb (cpu, offset, NoWriteBack); 955 } 956 957 /* 64 bit load sign-extended byte scaled or unscaled zero- 958 or sign-extended 32-bit register offset. */ 959 static void 960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 961 { 962 unsigned rm = INSTR (20, 16); 963 unsigned rn = INSTR (9, 5); 964 unsigned rt = INSTR (4, 0); 965 /* rn may reference SP, rm and rt must reference ZR */ 966 967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 969 extension); 970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 971 /* There is no scaling required for a byte load. */ 972 aarch64_set_reg_s64 (cpu, rt, NO_SP, 973 aarch64_get_mem_s8 (cpu, address + displacement)); 974 } 975 976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */ 977 static void 978 ldrh32_abs (sim_cpu *cpu, uint32_t offset) 979 { 980 unsigned rn = INSTR (9, 5); 981 unsigned rt = INSTR (4, 0); 982 uint32_t val; 983 984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 985 /* The target register may not be SP but the source may be. */ 986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 987 + SCALE (offset, 16)); 988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val); 989 } 990 991 /* 32 bit load zero-extended short unscaled signed 9 bit 992 with pre- or post-writeback. */ 993 static void 994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 995 { 996 unsigned rn = INSTR (9, 5); 997 unsigned rt = INSTR (4, 0); 998 uint64_t address; 999 1000 if (rn == rt && wb != NoWriteBack) 1001 HALT_UNALLOC; 1002 1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1004 1005 if (wb != Post) 1006 address += offset; 1007 1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); 1010 1011 if (wb == Post) 1012 address += offset; 1013 1014 if (wb != NoWriteBack) 1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1016 } 1017 1018 /* 32 bit load zero-extended short scaled or unscaled zero- 1019 or sign-extended 32-bit register offset. */ 1020 static void 1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1022 { 1023 unsigned rm = INSTR (20, 16); 1024 unsigned rn = INSTR (9, 5); 1025 unsigned rt = INSTR (4, 0); 1026 /* rn may reference SP, rm and rt must reference ZR */ 1027 1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 1031 1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1033 aarch64_set_reg_u32 (cpu, rt, NO_SP, 1034 aarch64_get_mem_u16 (cpu, address + displacement)); 1035 } 1036 1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */ 1038 static void 1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset) 1040 { 1041 unsigned rn = INSTR (9, 5); 1042 unsigned rt = INSTR (4, 0); 1043 int32_t val; 1044 1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1046 /* The target register may not be SP but the source may be. */ 1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 1048 + SCALE (offset, 16)); 1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val); 1050 } 1051 1052 /* 32 bit load sign-extended short unscaled signed 9 bit 1053 with pre- or post-writeback. */ 1054 static void 1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1056 { 1057 unsigned rn = INSTR (9, 5); 1058 unsigned rt = INSTR (4, 0); 1059 uint64_t address; 1060 1061 if (rn == rt && wb != NoWriteBack) 1062 HALT_UNALLOC; 1063 1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1065 1066 if (wb != Post) 1067 address += offset; 1068 1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1070 aarch64_set_reg_s32 (cpu, rt, NO_SP, 1071 (int32_t) aarch64_get_mem_s16 (cpu, address)); 1072 1073 if (wb == Post) 1074 address += offset; 1075 1076 if (wb != NoWriteBack) 1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1078 } 1079 1080 /* 32 bit load sign-extended short scaled or unscaled zero- 1081 or sign-extended 32-bit register offset. */ 1082 static void 1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1084 { 1085 unsigned rm = INSTR (20, 16); 1086 unsigned rn = INSTR (9, 5); 1087 unsigned rt = INSTR (4, 0); 1088 /* rn may reference SP, rm and rt must reference ZR */ 1089 1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 1093 1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1095 aarch64_set_reg_s32 (cpu, rt, NO_SP, 1096 (int32_t) aarch64_get_mem_s16 1097 (cpu, address + displacement)); 1098 } 1099 1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */ 1101 static void 1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset) 1103 { 1104 unsigned rn = INSTR (9, 5); 1105 unsigned rt = INSTR (4, 0); 1106 int64_t val; 1107 1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1109 /* The target register may not be SP but the source may be. */ 1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 1111 + SCALE (offset, 16)); 1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 1113 } 1114 1115 /* 64 bit load sign-extended short unscaled signed 9 bit 1116 with pre- or post-writeback. */ 1117 static void 1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1119 { 1120 unsigned rn = INSTR (9, 5); 1121 unsigned rt = INSTR (4, 0); 1122 uint64_t address; 1123 int64_t val; 1124 1125 if (rn == rt && wb != NoWriteBack) 1126 HALT_UNALLOC; 1127 1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1130 1131 if (wb != Post) 1132 address += offset; 1133 1134 val = aarch64_get_mem_s16 (cpu, address); 1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 1136 1137 if (wb == Post) 1138 address += offset; 1139 1140 if (wb != NoWriteBack) 1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1142 } 1143 1144 /* 64 bit load sign-extended short scaled or unscaled zero- 1145 or sign-extended 32-bit register offset. */ 1146 static void 1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1148 { 1149 unsigned rm = INSTR (20, 16); 1150 unsigned rn = INSTR (9, 5); 1151 unsigned rt = INSTR (4, 0); 1152 1153 /* rn may reference SP, rm and rt must reference ZR */ 1154 1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 1158 int64_t val; 1159 1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1161 val = aarch64_get_mem_s16 (cpu, address + displacement); 1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 1163 } 1164 1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */ 1166 static void 1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset) 1168 { 1169 unsigned rn = INSTR (9, 5); 1170 unsigned rt = INSTR (4, 0); 1171 int64_t val; 1172 1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 1175 + SCALE (offset, 32)); 1176 /* The target register may not be SP but the source may be. */ 1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 1178 } 1179 1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit 1181 with pre- or post-writeback. */ 1182 static void 1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1184 { 1185 unsigned rn = INSTR (9, 5); 1186 unsigned rt = INSTR (4, 0); 1187 uint64_t address; 1188 1189 if (rn == rt && wb != NoWriteBack) 1190 HALT_UNALLOC; 1191 1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1193 1194 if (wb != Post) 1195 address += offset; 1196 1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address)); 1199 1200 if (wb == Post) 1201 address += offset; 1202 1203 if (wb != NoWriteBack) 1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1205 } 1206 1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero- 1208 or sign-extended 32-bit register offset. */ 1209 static void 1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1211 { 1212 unsigned rm = INSTR (20, 16); 1213 unsigned rn = INSTR (9, 5); 1214 unsigned rt = INSTR (4, 0); 1215 /* rn may reference SP, rm and rt must reference ZR */ 1216 1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 1220 1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1222 aarch64_set_reg_s64 (cpu, rt, NO_SP, 1223 aarch64_get_mem_s32 (cpu, address + displacement)); 1224 } 1225 1226 /* N.B. with stores the value in source is written to the 1227 address identified by source2 modified by source3/offset. */ 1228 1229 /* 32 bit store scaled unsigned 12 bit. */ 1230 static void 1231 str32_abs (sim_cpu *cpu, uint32_t offset) 1232 { 1233 unsigned rn = INSTR (9, 5); 1234 unsigned rt = INSTR (4, 0); 1235 1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1237 /* The target register may not be SP but the source may be. */ 1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK) 1239 + SCALE (offset, 32)), 1240 aarch64_get_reg_u32 (cpu, rt, NO_SP)); 1241 } 1242 1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ 1244 static void 1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1246 { 1247 unsigned rn = INSTR (9, 5); 1248 unsigned rt = INSTR (4, 0); 1249 uint64_t address; 1250 1251 if (rn == rt && wb != NoWriteBack) 1252 HALT_UNALLOC; 1253 1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1255 if (wb != Post) 1256 address += offset; 1257 1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP)); 1260 1261 if (wb == Post) 1262 address += offset; 1263 1264 if (wb != NoWriteBack) 1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1266 } 1267 1268 /* 32 bit store scaled or unscaled zero- or 1269 sign-extended 32-bit register offset. */ 1270 static void 1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1272 { 1273 unsigned rm = INSTR (20, 16); 1274 unsigned rn = INSTR (9, 5); 1275 unsigned rt = INSTR (4, 0); 1276 1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 1280 1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1282 aarch64_set_mem_u32 (cpu, address + displacement, 1283 aarch64_get_reg_u64 (cpu, rt, NO_SP)); 1284 } 1285 1286 /* 64 bit store scaled unsigned 12 bit. */ 1287 static void 1288 str_abs (sim_cpu *cpu, uint32_t offset) 1289 { 1290 unsigned rn = INSTR (9, 5); 1291 unsigned rt = INSTR (4, 0); 1292 1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1294 aarch64_set_mem_u64 (cpu, 1295 aarch64_get_reg_u64 (cpu, rn, SP_OK) 1296 + SCALE (offset, 64), 1297 aarch64_get_reg_u64 (cpu, rt, NO_SP)); 1298 } 1299 1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ 1301 static void 1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1303 { 1304 unsigned rn = INSTR (9, 5); 1305 unsigned rt = INSTR (4, 0); 1306 uint64_t address; 1307 1308 if (rn == rt && wb != NoWriteBack) 1309 HALT_UNALLOC; 1310 1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1312 1313 if (wb != Post) 1314 address += offset; 1315 1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP)); 1318 1319 if (wb == Post) 1320 address += offset; 1321 1322 if (wb != NoWriteBack) 1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1324 } 1325 1326 /* 64 bit store scaled or unscaled zero- 1327 or sign-extended 32-bit register offset. */ 1328 static void 1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1330 { 1331 unsigned rm = INSTR (20, 16); 1332 unsigned rn = INSTR (9, 5); 1333 unsigned rt = INSTR (4, 0); 1334 /* rn may reference SP, rm and rt must reference ZR */ 1335 1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1338 extension); 1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 1340 1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1342 aarch64_set_mem_u64 (cpu, address + displacement, 1343 aarch64_get_reg_u64 (cpu, rt, NO_SP)); 1344 } 1345 1346 /* 32 bit store byte scaled unsigned 12 bit. */ 1347 static void 1348 strb_abs (sim_cpu *cpu, uint32_t offset) 1349 { 1350 unsigned rn = INSTR (9, 5); 1351 unsigned rt = INSTR (4, 0); 1352 1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1354 /* The target register may not be SP but the source may be. 1355 There is no scaling required for a byte load. */ 1356 aarch64_set_mem_u8 (cpu, 1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 1358 aarch64_get_reg_u8 (cpu, rt, NO_SP)); 1359 } 1360 1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */ 1362 static void 1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1364 { 1365 unsigned rn = INSTR (9, 5); 1366 unsigned rt = INSTR (4, 0); 1367 uint64_t address; 1368 1369 if (rn == rt && wb != NoWriteBack) 1370 HALT_UNALLOC; 1371 1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1373 1374 if (wb != Post) 1375 address += offset; 1376 1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP)); 1379 1380 if (wb == Post) 1381 address += offset; 1382 1383 if (wb != NoWriteBack) 1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1385 } 1386 1387 /* 32 bit store byte scaled or unscaled zero- 1388 or sign-extended 32-bit register offset. */ 1389 static void 1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1391 { 1392 unsigned rm = INSTR (20, 16); 1393 unsigned rn = INSTR (9, 5); 1394 unsigned rt = INSTR (4, 0); 1395 /* rn may reference SP, rm and rt must reference ZR */ 1396 1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1399 extension); 1400 1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1402 /* There is no scaling required for a byte load. */ 1403 aarch64_set_mem_u8 (cpu, address + displacement, 1404 aarch64_get_reg_u8 (cpu, rt, NO_SP)); 1405 } 1406 1407 /* 32 bit store short scaled unsigned 12 bit. */ 1408 static void 1409 strh_abs (sim_cpu *cpu, uint32_t offset) 1410 { 1411 unsigned rn = INSTR (9, 5); 1412 unsigned rt = INSTR (4, 0); 1413 1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1415 /* The target register may not be SP but the source may be. */ 1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 1417 + SCALE (offset, 16), 1418 aarch64_get_reg_u16 (cpu, rt, NO_SP)); 1419 } 1420 1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */ 1422 static void 1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1424 { 1425 unsigned rn = INSTR (9, 5); 1426 unsigned rt = INSTR (4, 0); 1427 uint64_t address; 1428 1429 if (rn == rt && wb != NoWriteBack) 1430 HALT_UNALLOC; 1431 1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1433 1434 if (wb != Post) 1435 address += offset; 1436 1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP)); 1439 1440 if (wb == Post) 1441 address += offset; 1442 1443 if (wb != NoWriteBack) 1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1445 } 1446 1447 /* 32 bit store short scaled or unscaled zero- 1448 or sign-extended 32-bit register offset. */ 1449 static void 1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1451 { 1452 unsigned rm = INSTR (20, 16); 1453 unsigned rn = INSTR (9, 5); 1454 unsigned rt = INSTR (4, 0); 1455 /* rn may reference SP, rm and rt must reference ZR */ 1456 1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 1460 1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1462 aarch64_set_mem_u16 (cpu, address + displacement, 1463 aarch64_get_reg_u16 (cpu, rt, NO_SP)); 1464 } 1465 1466 /* Prefetch unsigned 12 bit. */ 1467 static void 1468 prfm_abs (sim_cpu *cpu, uint32_t offset) 1469 { 1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, 1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, 1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, 1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, 1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, 1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, 1476 ow ==> UNALLOC 1477 PrfOp prfop = prfop (instr, 4, 0); 1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) 1479 + SCALE (offset, 64). */ 1480 1481 /* TODO : implement prefetch of address. */ 1482 } 1483 1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */ 1485 static void 1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1487 { 1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, 1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, 1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, 1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, 1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, 1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, 1494 ow ==> UNALLOC 1495 rn may reference SP, rm may only reference ZR 1496 PrfOp prfop = prfop (instr, 4, 0); 1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1499 extension); 1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 1501 uint64_t address = base + displacement. */ 1502 1503 /* TODO : implement prefetch of address */ 1504 } 1505 1506 /* 64 bit pc-relative prefetch. */ 1507 static void 1508 prfm_pcrel (sim_cpu *cpu, int32_t offset) 1509 { 1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, 1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, 1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, 1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, 1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, 1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, 1516 ow ==> UNALLOC 1517 PrfOp prfop = prfop (instr, 4, 0); 1518 uint64_t address = aarch64_get_PC (cpu) + offset. */ 1519 1520 /* TODO : implement this */ 1521 } 1522 1523 /* Load-store exclusive. */ 1524 1525 static void 1526 ldxr (sim_cpu *cpu) 1527 { 1528 unsigned rn = INSTR (9, 5); 1529 unsigned rt = INSTR (4, 0); 1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1531 int size = INSTR (31, 30); 1532 /* int ordered = INSTR (15, 15); */ 1533 /* int exclusive = ! INSTR (23, 23); */ 1534 1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1536 switch (size) 1537 { 1538 case 0: 1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); 1540 break; 1541 case 1: 1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); 1543 break; 1544 case 2: 1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); 1546 break; 1547 case 3: 1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); 1549 break; 1550 } 1551 } 1552 1553 static void 1554 stxr (sim_cpu *cpu) 1555 { 1556 unsigned rn = INSTR (9, 5); 1557 unsigned rt = INSTR (4, 0); 1558 unsigned rs = INSTR (20, 16); 1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1560 int size = INSTR (31, 30); 1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP); 1562 1563 switch (size) 1564 { 1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break; 1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break; 1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break; 1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break; 1569 } 1570 1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */ 1573 } 1574 1575 static void 1576 dexLoadLiteral (sim_cpu *cpu) 1577 { 1578 /* instr[29,27] == 011 1579 instr[25,24] == 00 1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS 1581 010 ==> LDRX, 011 ==> FLDRD 1582 100 ==> LDRSW, 101 ==> FLDRQ 1583 110 ==> PRFM, 111 ==> UNALLOC 1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg 1585 instr[23, 5] == simm19 */ 1586 1587 /* unsigned rt = INSTR (4, 0); */ 1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26); 1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5); 1590 1591 switch (dispatch) 1592 { 1593 case 0: ldr32_pcrel (cpu, imm); break; 1594 case 1: fldrs_pcrel (cpu, imm); break; 1595 case 2: ldr_pcrel (cpu, imm); break; 1596 case 3: fldrd_pcrel (cpu, imm); break; 1597 case 4: ldrsw_pcrel (cpu, imm); break; 1598 case 5: fldrq_pcrel (cpu, imm); break; 1599 case 6: prfm_pcrel (cpu, imm); break; 1600 case 7: 1601 default: 1602 HALT_UNALLOC; 1603 } 1604 } 1605 1606 /* Immediate arithmetic 1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned 1608 value left shifted by 12 bits (done at decode). 1609 1610 N.B. the register args (dest, source) can normally be Xn or SP. 1611 the exception occurs for flag setting instructions which may 1612 only use Xn for the output (dest). */ 1613 1614 /* 32 bit add immediate. */ 1615 static void 1616 add32 (sim_cpu *cpu, uint32_t aimm) 1617 { 1618 unsigned rn = INSTR (9, 5); 1619 unsigned rd = INSTR (4, 0); 1620 1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1622 aarch64_set_reg_u64 (cpu, rd, SP_OK, 1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm); 1624 } 1625 1626 /* 64 bit add immediate. */ 1627 static void 1628 add64 (sim_cpu *cpu, uint32_t aimm) 1629 { 1630 unsigned rn = INSTR (9, 5); 1631 unsigned rd = INSTR (4, 0); 1632 1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1634 aarch64_set_reg_u64 (cpu, rd, SP_OK, 1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm); 1636 } 1637 1638 static void 1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2) 1640 { 1641 int32_t result = value1 + value2; 1642 int64_t sresult = (int64_t) value1 + (int64_t) value2; 1643 uint64_t uresult = (uint64_t)(uint32_t) value1 1644 + (uint64_t)(uint32_t) value2; 1645 uint32_t flags = 0; 1646 1647 if (result == 0) 1648 flags |= Z; 1649 1650 if (result & (1 << 31)) 1651 flags |= N; 1652 1653 if (uresult != (uint32_t)result) 1654 flags |= C; 1655 1656 if (sresult != result) 1657 flags |= V; 1658 1659 aarch64_set_CPSR (cpu, flags); 1660 } 1661 1662 #define NEG(a) (((a) & signbit) == signbit) 1663 #define POS(a) (((a) & signbit) == 0) 1664 1665 static void 1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) 1667 { 1668 uint64_t result = value1 + value2; 1669 uint32_t flags = 0; 1670 uint64_t signbit = 1ULL << 63; 1671 1672 if (result == 0) 1673 flags |= Z; 1674 1675 if (NEG (result)) 1676 flags |= N; 1677 1678 if ( (NEG (value1) && NEG (value2)) 1679 || (NEG (value1) && POS (result)) 1680 || (NEG (value2) && POS (result))) 1681 flags |= C; 1682 1683 if ( (NEG (value1) && NEG (value2) && POS (result)) 1684 || (POS (value1) && POS (value2) && NEG (result))) 1685 flags |= V; 1686 1687 aarch64_set_CPSR (cpu, flags); 1688 } 1689 1690 static void 1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2) 1692 { 1693 uint32_t result = value1 - value2; 1694 uint32_t flags = 0; 1695 uint32_t signbit = 1U << 31; 1696 1697 if (result == 0) 1698 flags |= Z; 1699 1700 if (NEG (result)) 1701 flags |= N; 1702 1703 if ( (NEG (value1) && POS (value2)) 1704 || (NEG (value1) && POS (result)) 1705 || (POS (value2) && POS (result))) 1706 flags |= C; 1707 1708 if ( (NEG (value1) && POS (value2) && POS (result)) 1709 || (POS (value1) && NEG (value2) && NEG (result))) 1710 flags |= V; 1711 1712 aarch64_set_CPSR (cpu, flags); 1713 } 1714 1715 static void 1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) 1717 { 1718 uint64_t result = value1 - value2; 1719 uint32_t flags = 0; 1720 uint64_t signbit = 1ULL << 63; 1721 1722 if (result == 0) 1723 flags |= Z; 1724 1725 if (NEG (result)) 1726 flags |= N; 1727 1728 if ( (NEG (value1) && POS (value2)) 1729 || (NEG (value1) && POS (result)) 1730 || (POS (value2) && POS (result))) 1731 flags |= C; 1732 1733 if ( (NEG (value1) && POS (value2) && POS (result)) 1734 || (POS (value1) && NEG (value2) && NEG (result))) 1735 flags |= V; 1736 1737 aarch64_set_CPSR (cpu, flags); 1738 } 1739 1740 static void 1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result) 1742 { 1743 uint32_t flags = 0; 1744 1745 if (result == 0) 1746 flags |= Z; 1747 else 1748 flags &= ~ Z; 1749 1750 if (result & (1 << 31)) 1751 flags |= N; 1752 else 1753 flags &= ~ N; 1754 1755 aarch64_set_CPSR (cpu, flags); 1756 } 1757 1758 static void 1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result) 1760 { 1761 uint32_t flags = 0; 1762 1763 if (result == 0) 1764 flags |= Z; 1765 else 1766 flags &= ~ Z; 1767 1768 if (result & (1ULL << 63)) 1769 flags |= N; 1770 else 1771 flags &= ~ N; 1772 1773 aarch64_set_CPSR (cpu, flags); 1774 } 1775 1776 /* 32 bit add immediate set flags. */ 1777 static void 1778 adds32 (sim_cpu *cpu, uint32_t aimm) 1779 { 1780 unsigned rn = INSTR (9, 5); 1781 unsigned rd = INSTR (4, 0); 1782 /* TODO : do we need to worry about signs here? */ 1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK); 1784 1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm); 1787 set_flags_for_add32 (cpu, value1, aimm); 1788 } 1789 1790 /* 64 bit add immediate set flags. */ 1791 static void 1792 adds64 (sim_cpu *cpu, uint32_t aimm) 1793 { 1794 unsigned rn = INSTR (9, 5); 1795 unsigned rd = INSTR (4, 0); 1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1797 uint64_t value2 = aimm; 1798 1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 1801 set_flags_for_add64 (cpu, value1, value2); 1802 } 1803 1804 /* 32 bit sub immediate. */ 1805 static void 1806 sub32 (sim_cpu *cpu, uint32_t aimm) 1807 { 1808 unsigned rn = INSTR (9, 5); 1809 unsigned rd = INSTR (4, 0); 1810 1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1812 aarch64_set_reg_u64 (cpu, rd, SP_OK, 1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm); 1814 } 1815 1816 /* 64 bit sub immediate. */ 1817 static void 1818 sub64 (sim_cpu *cpu, uint32_t aimm) 1819 { 1820 unsigned rn = INSTR (9, 5); 1821 unsigned rd = INSTR (4, 0); 1822 1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1824 aarch64_set_reg_u64 (cpu, rd, SP_OK, 1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm); 1826 } 1827 1828 /* 32 bit sub immediate set flags. */ 1829 static void 1830 subs32 (sim_cpu *cpu, uint32_t aimm) 1831 { 1832 unsigned rn = INSTR (9, 5); 1833 unsigned rd = INSTR (4, 0); 1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1835 uint32_t value2 = aimm; 1836 1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 1839 set_flags_for_sub32 (cpu, value1, value2); 1840 } 1841 1842 /* 64 bit sub immediate set flags. */ 1843 static void 1844 subs64 (sim_cpu *cpu, uint32_t aimm) 1845 { 1846 unsigned rn = INSTR (9, 5); 1847 unsigned rd = INSTR (4, 0); 1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1849 uint32_t value2 = aimm; 1850 1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 1853 set_flags_for_sub64 (cpu, value1, value2); 1854 } 1855 1856 /* Data Processing Register. */ 1857 1858 /* First two helpers to perform the shift operations. */ 1859 1860 static inline uint32_t 1861 shifted32 (uint32_t value, Shift shift, uint32_t count) 1862 { 1863 switch (shift) 1864 { 1865 default: 1866 case LSL: 1867 return (value << count); 1868 case LSR: 1869 return (value >> count); 1870 case ASR: 1871 { 1872 int32_t svalue = value; 1873 return (svalue >> count); 1874 } 1875 case ROR: 1876 { 1877 uint32_t top = value >> count; 1878 uint32_t bottom = value << (32 - count); 1879 return (bottom | top); 1880 } 1881 } 1882 } 1883 1884 static inline uint64_t 1885 shifted64 (uint64_t value, Shift shift, uint32_t count) 1886 { 1887 switch (shift) 1888 { 1889 default: 1890 case LSL: 1891 return (value << count); 1892 case LSR: 1893 return (value >> count); 1894 case ASR: 1895 { 1896 int64_t svalue = value; 1897 return (svalue >> count); 1898 } 1899 case ROR: 1900 { 1901 uint64_t top = value >> count; 1902 uint64_t bottom = value << (64 - count); 1903 return (bottom | top); 1904 } 1905 } 1906 } 1907 1908 /* Arithmetic shifted register. 1909 These allow an optional LSL, ASR or LSR to the second source 1910 register with a count up to the register bit count. 1911 1912 N.B register args may not be SP. */ 1913 1914 /* 32 bit ADD shifted register. */ 1915 static void 1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1917 { 1918 unsigned rm = INSTR (20, 16); 1919 unsigned rn = INSTR (9, 5); 1920 unsigned rd = INSTR (4, 0); 1921 1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1923 aarch64_set_reg_u64 (cpu, rd, NO_SP, 1924 aarch64_get_reg_u32 (cpu, rn, NO_SP) 1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1926 shift, count)); 1927 } 1928 1929 /* 64 bit ADD shifted register. */ 1930 static void 1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1932 { 1933 unsigned rm = INSTR (20, 16); 1934 unsigned rn = INSTR (9, 5); 1935 unsigned rd = INSTR (4, 0); 1936 1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1938 aarch64_set_reg_u64 (cpu, rd, NO_SP, 1939 aarch64_get_reg_u64 (cpu, rn, NO_SP) 1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 1941 shift, count)); 1942 } 1943 1944 /* 32 bit ADD shifted register setting flags. */ 1945 static void 1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1947 { 1948 unsigned rm = INSTR (20, 16); 1949 unsigned rn = INSTR (9, 5); 1950 unsigned rd = INSTR (4, 0); 1951 1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1954 shift, count); 1955 1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 1958 set_flags_for_add32 (cpu, value1, value2); 1959 } 1960 1961 /* 64 bit ADD shifted register setting flags. */ 1962 static void 1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1964 { 1965 unsigned rm = INSTR (20, 16); 1966 unsigned rn = INSTR (9, 5); 1967 unsigned rd = INSTR (4, 0); 1968 1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 1971 shift, count); 1972 1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 1975 set_flags_for_add64 (cpu, value1, value2); 1976 } 1977 1978 /* 32 bit SUB shifted register. */ 1979 static void 1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1981 { 1982 unsigned rm = INSTR (20, 16); 1983 unsigned rn = INSTR (9, 5); 1984 unsigned rd = INSTR (4, 0); 1985 1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1987 aarch64_set_reg_u64 (cpu, rd, NO_SP, 1988 aarch64_get_reg_u32 (cpu, rn, NO_SP) 1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1990 shift, count)); 1991 } 1992 1993 /* 64 bit SUB shifted register. */ 1994 static void 1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1996 { 1997 unsigned rm = INSTR (20, 16); 1998 unsigned rn = INSTR (9, 5); 1999 unsigned rd = INSTR (4, 0); 2000 2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2002 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2003 aarch64_get_reg_u64 (cpu, rn, NO_SP) 2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 2005 shift, count)); 2006 } 2007 2008 /* 32 bit SUB shifted register setting flags. */ 2009 static void 2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 2011 { 2012 unsigned rm = INSTR (20, 16); 2013 unsigned rn = INSTR (9, 5); 2014 unsigned rd = INSTR (4, 0); 2015 2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 2018 shift, count); 2019 2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 2022 set_flags_for_sub32 (cpu, value1, value2); 2023 } 2024 2025 /* 64 bit SUB shifted register setting flags. */ 2026 static void 2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 2028 { 2029 unsigned rm = INSTR (20, 16); 2030 unsigned rn = INSTR (9, 5); 2031 unsigned rd = INSTR (4, 0); 2032 2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 2035 shift, count); 2036 2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 2039 set_flags_for_sub64 (cpu, value1, value2); 2040 } 2041 2042 /* First a couple more helpers to fetch the 2043 relevant source register element either 2044 sign or zero extended as required by the 2045 extension value. */ 2046 2047 static uint32_t 2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension) 2049 { 2050 switch (extension) 2051 { 2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP); 2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP); 2054 case UXTW: /* Fall through. */ 2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP); 2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP); 2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP); 2058 case SXTW: /* Fall through. */ 2059 case SXTX: /* Fall through. */ 2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP); 2061 } 2062 } 2063 2064 static uint64_t 2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension) 2066 { 2067 switch (extension) 2068 { 2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP); 2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP); 2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP); 2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP); 2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP); 2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP); 2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP); 2076 case SXTX: 2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP); 2078 } 2079 } 2080 2081 /* Arithmetic extending register 2082 These allow an optional sign extension of some portion of the 2083 second source register followed by an optional left shift of 2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???) 2085 2086 N.B output (dest) and first input arg (source) may normally be Xn 2087 or SP. However, for flag setting operations dest can only be 2088 Xn. Second input registers are always Xn. */ 2089 2090 /* 32 bit ADD extending register. */ 2091 static void 2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2093 { 2094 unsigned rm = INSTR (20, 16); 2095 unsigned rn = INSTR (9, 5); 2096 unsigned rd = INSTR (4, 0); 2097 2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2099 aarch64_set_reg_u64 (cpu, rd, SP_OK, 2100 aarch64_get_reg_u32 (cpu, rn, SP_OK) 2101 + (extreg32 (cpu, rm, extension) << shift)); 2102 } 2103 2104 /* 64 bit ADD extending register. 2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */ 2106 static void 2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2108 { 2109 unsigned rm = INSTR (20, 16); 2110 unsigned rn = INSTR (9, 5); 2111 unsigned rd = INSTR (4, 0); 2112 2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2114 aarch64_set_reg_u64 (cpu, rd, SP_OK, 2115 aarch64_get_reg_u64 (cpu, rn, SP_OK) 2116 + (extreg64 (cpu, rm, extension) << shift)); 2117 } 2118 2119 /* 32 bit ADD extending register setting flags. */ 2120 static void 2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2122 { 2123 unsigned rm = INSTR (20, 16); 2124 unsigned rn = INSTR (9, 5); 2125 unsigned rd = INSTR (4, 0); 2126 2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); 2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift; 2129 2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 2132 set_flags_for_add32 (cpu, value1, value2); 2133 } 2134 2135 /* 64 bit ADD extending register setting flags */ 2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */ 2137 static void 2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2139 { 2140 unsigned rm = INSTR (20, 16); 2141 unsigned rn = INSTR (9, 5); 2142 unsigned rd = INSTR (4, 0); 2143 2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift; 2146 2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 2149 set_flags_for_add64 (cpu, value1, value2); 2150 } 2151 2152 /* 32 bit SUB extending register. */ 2153 static void 2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2155 { 2156 unsigned rm = INSTR (20, 16); 2157 unsigned rn = INSTR (9, 5); 2158 unsigned rd = INSTR (4, 0); 2159 2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2161 aarch64_set_reg_u64 (cpu, rd, SP_OK, 2162 aarch64_get_reg_u32 (cpu, rn, SP_OK) 2163 - (extreg32 (cpu, rm, extension) << shift)); 2164 } 2165 2166 /* 64 bit SUB extending register. */ 2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */ 2168 static void 2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2170 { 2171 unsigned rm = INSTR (20, 16); 2172 unsigned rn = INSTR (9, 5); 2173 unsigned rd = INSTR (4, 0); 2174 2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2176 aarch64_set_reg_u64 (cpu, rd, SP_OK, 2177 aarch64_get_reg_u64 (cpu, rn, SP_OK) 2178 - (extreg64 (cpu, rm, extension) << shift)); 2179 } 2180 2181 /* 32 bit SUB extending register setting flags. */ 2182 static void 2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2184 { 2185 unsigned rm = INSTR (20, 16); 2186 unsigned rn = INSTR (9, 5); 2187 unsigned rd = INSTR (4, 0); 2188 2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); 2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift; 2191 2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 2194 set_flags_for_sub32 (cpu, value1, value2); 2195 } 2196 2197 /* 64 bit SUB extending register setting flags */ 2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */ 2199 static void 2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2201 { 2202 unsigned rm = INSTR (20, 16); 2203 unsigned rn = INSTR (9, 5); 2204 unsigned rd = INSTR (4, 0); 2205 2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift; 2208 2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 2211 set_flags_for_sub64 (cpu, value1, value2); 2212 } 2213 2214 static void 2215 dexAddSubtractImmediate (sim_cpu *cpu) 2216 { 2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB 2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags 2220 instr[28,24] = 10001 2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC 2222 instr[21,10] = uimm12 2223 instr[9,5] = Rn 2224 instr[4,0] = Rd */ 2225 2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */ 2227 uint32_t shift = INSTR (23, 22); 2228 uint32_t imm = INSTR (21, 10); 2229 uint32_t dispatch = INSTR (31, 29); 2230 2231 NYI_assert (28, 24, 0x11); 2232 2233 if (shift > 1) 2234 HALT_UNALLOC; 2235 2236 if (shift) 2237 imm <<= 12; 2238 2239 switch (dispatch) 2240 { 2241 case 0: add32 (cpu, imm); break; 2242 case 1: adds32 (cpu, imm); break; 2243 case 2: sub32 (cpu, imm); break; 2244 case 3: subs32 (cpu, imm); break; 2245 case 4: add64 (cpu, imm); break; 2246 case 5: adds64 (cpu, imm); break; 2247 case 6: sub64 (cpu, imm); break; 2248 case 7: subs64 (cpu, imm); break; 2249 } 2250 } 2251 2252 static void 2253 dexAddSubtractShiftedRegister (sim_cpu *cpu) 2254 { 2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS 2257 instr[28,24] = 01011 2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC 2259 instr[21] = 0 2260 instr[20,16] = Rm 2261 instr[15,10] = count : must be 0xxxxx for 32 bit 2262 instr[9,5] = Rn 2263 instr[4,0] = Rd */ 2264 2265 uint32_t size = INSTR (31, 31); 2266 uint32_t count = INSTR (15, 10); 2267 Shift shiftType = INSTR (23, 22); 2268 2269 NYI_assert (28, 24, 0x0B); 2270 NYI_assert (21, 21, 0); 2271 2272 /* Shift encoded as ROR is unallocated. */ 2273 if (shiftType == ROR) 2274 HALT_UNALLOC; 2275 2276 /* 32 bit operations must have count[5] = 0 2277 or else we have an UNALLOC. */ 2278 if (size == 0 && uimm (count, 5, 5)) 2279 HALT_UNALLOC; 2280 2281 /* Dispatch on size:op i.e instr [31,29]. */ 2282 switch (INSTR (31, 29)) 2283 { 2284 case 0: add32_shift (cpu, shiftType, count); break; 2285 case 1: adds32_shift (cpu, shiftType, count); break; 2286 case 2: sub32_shift (cpu, shiftType, count); break; 2287 case 3: subs32_shift (cpu, shiftType, count); break; 2288 case 4: add64_shift (cpu, shiftType, count); break; 2289 case 5: adds64_shift (cpu, shiftType, count); break; 2290 case 6: sub64_shift (cpu, shiftType, count); break; 2291 case 7: subs64_shift (cpu, shiftType, count); break; 2292 } 2293 } 2294 2295 static void 2296 dexAddSubtractExtendedRegister (sim_cpu *cpu) 2297 { 2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB 2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags 2301 instr[28,24] = 01011 2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC 2303 instr[21] = 1 2304 instr[20,16] = Rm 2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH, 2306 000 ==> LSL|UXTW, 001 ==> UXTZ, 2307 000 ==> SXTB, 001 ==> SXTH, 2308 000 ==> SXTW, 001 ==> SXTX, 2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC 2310 instr[9,5] = Rn 2311 instr[4,0] = Rd */ 2312 2313 Extension extensionType = INSTR (15, 13); 2314 uint32_t shift = INSTR (12, 10); 2315 2316 NYI_assert (28, 24, 0x0B); 2317 NYI_assert (21, 21, 1); 2318 2319 /* Shift may not exceed 4. */ 2320 if (shift > 4) 2321 HALT_UNALLOC; 2322 2323 /* Dispatch on size:op:set?. */ 2324 switch (INSTR (31, 29)) 2325 { 2326 case 0: add32_ext (cpu, extensionType, shift); break; 2327 case 1: adds32_ext (cpu, extensionType, shift); break; 2328 case 2: sub32_ext (cpu, extensionType, shift); break; 2329 case 3: subs32_ext (cpu, extensionType, shift); break; 2330 case 4: add64_ext (cpu, extensionType, shift); break; 2331 case 5: adds64_ext (cpu, extensionType, shift); break; 2332 case 6: sub64_ext (cpu, extensionType, shift); break; 2333 case 7: subs64_ext (cpu, extensionType, shift); break; 2334 } 2335 } 2336 2337 /* Conditional data processing 2338 Condition register is implicit 3rd source. */ 2339 2340 /* 32 bit add with carry. */ 2341 /* N.B register args may not be SP. */ 2342 2343 static void 2344 adc32 (sim_cpu *cpu) 2345 { 2346 unsigned rm = INSTR (20, 16); 2347 unsigned rn = INSTR (9, 5); 2348 unsigned rd = INSTR (4, 0); 2349 2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2351 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2352 aarch64_get_reg_u32 (cpu, rn, NO_SP) 2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP) 2354 + IS_SET (C)); 2355 } 2356 2357 /* 64 bit add with carry */ 2358 static void 2359 adc64 (sim_cpu *cpu) 2360 { 2361 unsigned rm = INSTR (20, 16); 2362 unsigned rn = INSTR (9, 5); 2363 unsigned rd = INSTR (4, 0); 2364 2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2366 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2367 aarch64_get_reg_u64 (cpu, rn, NO_SP) 2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP) 2369 + IS_SET (C)); 2370 } 2371 2372 /* 32 bit add with carry setting flags. */ 2373 static void 2374 adcs32 (sim_cpu *cpu) 2375 { 2376 unsigned rm = INSTR (20, 16); 2377 unsigned rn = INSTR (9, 5); 2378 unsigned rd = INSTR (4, 0); 2379 2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); 2382 uint32_t carry = IS_SET (C); 2383 2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); 2386 set_flags_for_add32 (cpu, value1, value2 + carry); 2387 } 2388 2389 /* 64 bit add with carry setting flags. */ 2390 static void 2391 adcs64 (sim_cpu *cpu) 2392 { 2393 unsigned rm = INSTR (20, 16); 2394 unsigned rn = INSTR (9, 5); 2395 unsigned rd = INSTR (4, 0); 2396 2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); 2399 uint64_t carry = IS_SET (C); 2400 2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); 2403 set_flags_for_add64 (cpu, value1, value2 + carry); 2404 } 2405 2406 /* 32 bit sub with carry. */ 2407 static void 2408 sbc32 (sim_cpu *cpu) 2409 { 2410 unsigned rm = INSTR (20, 16); 2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */ 2412 unsigned rd = INSTR (4, 0); 2413 2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2415 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2416 aarch64_get_reg_u32 (cpu, rn, NO_SP) 2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP) 2418 - 1 + IS_SET (C)); 2419 } 2420 2421 /* 64 bit sub with carry */ 2422 static void 2423 sbc64 (sim_cpu *cpu) 2424 { 2425 unsigned rm = INSTR (20, 16); 2426 unsigned rn = INSTR (9, 5); 2427 unsigned rd = INSTR (4, 0); 2428 2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2430 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2431 aarch64_get_reg_u64 (cpu, rn, NO_SP) 2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP) 2433 - 1 + IS_SET (C)); 2434 } 2435 2436 /* 32 bit sub with carry setting flags */ 2437 static void 2438 sbcs32 (sim_cpu *cpu) 2439 { 2440 unsigned rm = INSTR (20, 16); 2441 unsigned rn = INSTR (9, 5); 2442 unsigned rd = INSTR (4, 0); 2443 2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); 2446 uint32_t carry = IS_SET (C); 2447 uint32_t result = value1 - value2 + 1 - carry; 2448 2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry); 2452 } 2453 2454 /* 64 bit sub with carry setting flags */ 2455 static void 2456 sbcs64 (sim_cpu *cpu) 2457 { 2458 unsigned rm = INSTR (20, 16); 2459 unsigned rn = INSTR (9, 5); 2460 unsigned rd = INSTR (4, 0); 2461 2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); 2464 uint64_t carry = IS_SET (C); 2465 uint64_t result = value1 - value2 + 1 - carry; 2466 2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry); 2470 } 2471 2472 static void 2473 dexAddSubtractWithCarry (sim_cpu *cpu) 2474 { 2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC 2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags 2478 instr[28,21] = 1 1010 000 2479 instr[20,16] = Rm 2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC 2481 instr[9,5] = Rn 2482 instr[4,0] = Rd */ 2483 2484 uint32_t op2 = INSTR (15, 10); 2485 2486 NYI_assert (28, 21, 0xD0); 2487 2488 if (op2 != 0) 2489 HALT_UNALLOC; 2490 2491 /* Dispatch on size:op:set?. */ 2492 switch (INSTR (31, 29)) 2493 { 2494 case 0: adc32 (cpu); break; 2495 case 1: adcs32 (cpu); break; 2496 case 2: sbc32 (cpu); break; 2497 case 3: sbcs32 (cpu); break; 2498 case 4: adc64 (cpu); break; 2499 case 5: adcs64 (cpu); break; 2500 case 6: sbc64 (cpu); break; 2501 case 7: sbcs64 (cpu); break; 2502 } 2503 } 2504 2505 static uint32_t 2506 testConditionCode (sim_cpu *cpu, CondCode cc) 2507 { 2508 /* This should be reduceable to branchless logic 2509 by some careful testing of bits in CC followed 2510 by the requisite masking and combining of bits 2511 from the flag register. 2512 2513 For now we do it with a switch. */ 2514 int res; 2515 2516 switch (cc) 2517 { 2518 case EQ: res = IS_SET (Z); break; 2519 case NE: res = IS_CLEAR (Z); break; 2520 case CS: res = IS_SET (C); break; 2521 case CC: res = IS_CLEAR (C); break; 2522 case MI: res = IS_SET (N); break; 2523 case PL: res = IS_CLEAR (N); break; 2524 case VS: res = IS_SET (V); break; 2525 case VC: res = IS_CLEAR (V); break; 2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break; 2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break; 2528 case GE: res = IS_SET (N) == IS_SET (V); break; 2529 case LT: res = IS_SET (N) != IS_SET (V); break; 2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break; 2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break; 2532 case AL: 2533 case NV: 2534 default: 2535 res = 1; 2536 break; 2537 } 2538 return res; 2539 } 2540 2541 static void 2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */ 2543 { 2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2545 instr[30] = compare with positive (1) or negative value (0) 2546 instr[29,21] = 1 1101 0010 2547 instr[20,16] = Rm or const 2548 instr[15,12] = cond 2549 instr[11] = compare reg (0) or const (1) 2550 instr[10] = 0 2551 instr[9,5] = Rn 2552 instr[4] = 0 2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */ 2554 signed int negate; 2555 unsigned rm; 2556 unsigned rn; 2557 2558 NYI_assert (29, 21, 0x1d2); 2559 NYI_assert (10, 10, 0); 2560 NYI_assert (4, 4, 0); 2561 2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2563 if (! testConditionCode (cpu, INSTR (15, 12))) 2564 { 2565 aarch64_set_CPSR (cpu, INSTR (3, 0)); 2566 return; 2567 } 2568 2569 negate = INSTR (30, 30) ? 1 : -1; 2570 rm = INSTR (20, 16); 2571 rn = INSTR ( 9, 5); 2572 2573 if (INSTR (31, 31)) 2574 { 2575 if (INSTR (11, 11)) 2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), 2577 negate * (uint64_t) rm); 2578 else 2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), 2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK)); 2581 } 2582 else 2583 { 2584 if (INSTR (11, 11)) 2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), 2586 negate * rm); 2587 else 2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), 2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK)); 2590 } 2591 } 2592 2593 static void 2594 do_vec_MOV_whole_vector (sim_cpu *cpu) 2595 { 2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm) 2597 2598 instr[31] = 0 2599 instr[30] = half(0)/full(1) 2600 instr[29,21] = 001110101 2601 instr[20,16] = Vs 2602 instr[15,10] = 000111 2603 instr[9,5] = Vs 2604 instr[4,0] = Vd */ 2605 2606 unsigned vs = INSTR (9, 5); 2607 unsigned vd = INSTR (4, 0); 2608 2609 NYI_assert (29, 21, 0x075); 2610 NYI_assert (15, 10, 0x07); 2611 2612 if (INSTR (20, 16) != vs) 2613 HALT_NYI; 2614 2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2616 if (INSTR (30, 30)) 2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1)); 2618 2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0)); 2620 } 2621 2622 static void 2623 do_vec_SMOV_into_scalar (sim_cpu *cpu) 2624 { 2625 /* instr[31] = 0 2626 instr[30] = word(0)/long(1) 2627 instr[29,21] = 00 1110 000 2628 instr[20,16] = element size and index 2629 instr[15,10] = 00 0010 11 2630 instr[9,5] = V source 2631 instr[4,0] = R dest */ 2632 2633 unsigned vs = INSTR (9, 5); 2634 unsigned rd = INSTR (4, 0); 2635 unsigned imm5 = INSTR (20, 16); 2636 unsigned full = INSTR (30, 30); 2637 int size, index; 2638 2639 NYI_assert (29, 21, 0x070); 2640 NYI_assert (15, 10, 0x0B); 2641 2642 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2643 2644 if (imm5 & 0x1) 2645 { 2646 size = 0; 2647 index = (imm5 >> 1) & 0xF; 2648 } 2649 else if (imm5 & 0x2) 2650 { 2651 size = 1; 2652 index = (imm5 >> 2) & 0x7; 2653 } 2654 else if (full && (imm5 & 0x4)) 2655 { 2656 size = 2; 2657 index = (imm5 >> 3) & 0x3; 2658 } 2659 else 2660 HALT_UNALLOC; 2661 2662 switch (size) 2663 { 2664 case 0: 2665 if (full) 2666 aarch64_set_reg_s64 (cpu, rd, NO_SP, 2667 aarch64_get_vec_s8 (cpu, vs, index)); 2668 else 2669 aarch64_set_reg_s32 (cpu, rd, NO_SP, 2670 aarch64_get_vec_s8 (cpu, vs, index)); 2671 break; 2672 2673 case 1: 2674 if (full) 2675 aarch64_set_reg_s64 (cpu, rd, NO_SP, 2676 aarch64_get_vec_s16 (cpu, vs, index)); 2677 else 2678 aarch64_set_reg_s32 (cpu, rd, NO_SP, 2679 aarch64_get_vec_s16 (cpu, vs, index)); 2680 break; 2681 2682 case 2: 2683 aarch64_set_reg_s64 (cpu, rd, NO_SP, 2684 aarch64_get_vec_s32 (cpu, vs, index)); 2685 break; 2686 2687 default: 2688 HALT_UNALLOC; 2689 } 2690 } 2691 2692 static void 2693 do_vec_UMOV_into_scalar (sim_cpu *cpu) 2694 { 2695 /* instr[31] = 0 2696 instr[30] = word(0)/long(1) 2697 instr[29,21] = 00 1110 000 2698 instr[20,16] = element size and index 2699 instr[15,10] = 00 0011 11 2700 instr[9,5] = V source 2701 instr[4,0] = R dest */ 2702 2703 unsigned vs = INSTR (9, 5); 2704 unsigned rd = INSTR (4, 0); 2705 unsigned imm5 = INSTR (20, 16); 2706 unsigned full = INSTR (30, 30); 2707 int size, index; 2708 2709 NYI_assert (29, 21, 0x070); 2710 NYI_assert (15, 10, 0x0F); 2711 2712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2713 2714 if (!full) 2715 { 2716 if (imm5 & 0x1) 2717 { 2718 size = 0; 2719 index = (imm5 >> 1) & 0xF; 2720 } 2721 else if (imm5 & 0x2) 2722 { 2723 size = 1; 2724 index = (imm5 >> 2) & 0x7; 2725 } 2726 else if (imm5 & 0x4) 2727 { 2728 size = 2; 2729 index = (imm5 >> 3) & 0x3; 2730 } 2731 else 2732 HALT_UNALLOC; 2733 } 2734 else if (imm5 & 0x8) 2735 { 2736 size = 3; 2737 index = (imm5 >> 4) & 0x1; 2738 } 2739 else 2740 HALT_UNALLOC; 2741 2742 switch (size) 2743 { 2744 case 0: 2745 aarch64_set_reg_u32 (cpu, rd, NO_SP, 2746 aarch64_get_vec_u8 (cpu, vs, index)); 2747 break; 2748 2749 case 1: 2750 aarch64_set_reg_u32 (cpu, rd, NO_SP, 2751 aarch64_get_vec_u16 (cpu, vs, index)); 2752 break; 2753 2754 case 2: 2755 aarch64_set_reg_u32 (cpu, rd, NO_SP, 2756 aarch64_get_vec_u32 (cpu, vs, index)); 2757 break; 2758 2759 case 3: 2760 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2761 aarch64_get_vec_u64 (cpu, vs, index)); 2762 break; 2763 2764 default: 2765 HALT_UNALLOC; 2766 } 2767 } 2768 2769 static void 2770 do_vec_INS (sim_cpu *cpu) 2771 { 2772 /* instr[31,21] = 01001110000 2773 instr[20,16] = element size and index 2774 instr[15,10] = 000111 2775 instr[9,5] = W source 2776 instr[4,0] = V dest */ 2777 2778 int index; 2779 unsigned rs = INSTR (9, 5); 2780 unsigned vd = INSTR (4, 0); 2781 2782 NYI_assert (31, 21, 0x270); 2783 NYI_assert (15, 10, 0x07); 2784 2785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2786 if (INSTR (16, 16)) 2787 { 2788 index = INSTR (20, 17); 2789 aarch64_set_vec_u8 (cpu, vd, index, 2790 aarch64_get_reg_u8 (cpu, rs, NO_SP)); 2791 } 2792 else if (INSTR (17, 17)) 2793 { 2794 index = INSTR (20, 18); 2795 aarch64_set_vec_u16 (cpu, vd, index, 2796 aarch64_get_reg_u16 (cpu, rs, NO_SP)); 2797 } 2798 else if (INSTR (18, 18)) 2799 { 2800 index = INSTR (20, 19); 2801 aarch64_set_vec_u32 (cpu, vd, index, 2802 aarch64_get_reg_u32 (cpu, rs, NO_SP)); 2803 } 2804 else if (INSTR (19, 19)) 2805 { 2806 index = INSTR (20, 20); 2807 aarch64_set_vec_u64 (cpu, vd, index, 2808 aarch64_get_reg_u64 (cpu, rs, NO_SP)); 2809 } 2810 else 2811 HALT_NYI; 2812 } 2813 2814 static void 2815 do_vec_DUP_vector_into_vector (sim_cpu *cpu) 2816 { 2817 /* instr[31] = 0 2818 instr[30] = half(0)/full(1) 2819 instr[29,21] = 00 1110 000 2820 instr[20,16] = element size and index 2821 instr[15,10] = 0000 01 2822 instr[9,5] = V source 2823 instr[4,0] = V dest. */ 2824 2825 unsigned full = INSTR (30, 30); 2826 unsigned vs = INSTR (9, 5); 2827 unsigned vd = INSTR (4, 0); 2828 int i, index; 2829 2830 NYI_assert (29, 21, 0x070); 2831 NYI_assert (15, 10, 0x01); 2832 2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2834 if (INSTR (16, 16)) 2835 { 2836 index = INSTR (20, 17); 2837 2838 for (i = 0; i < (full ? 16 : 8); i++) 2839 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index)); 2840 } 2841 else if (INSTR (17, 17)) 2842 { 2843 index = INSTR (20, 18); 2844 2845 for (i = 0; i < (full ? 8 : 4); i++) 2846 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index)); 2847 } 2848 else if (INSTR (18, 18)) 2849 { 2850 index = INSTR (20, 19); 2851 2852 for (i = 0; i < (full ? 4 : 2); i++) 2853 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index)); 2854 } 2855 else 2856 { 2857 if (INSTR (19, 19) == 0) 2858 HALT_UNALLOC; 2859 2860 if (! full) 2861 HALT_UNALLOC; 2862 2863 index = INSTR (20, 20); 2864 2865 for (i = 0; i < 2; i++) 2866 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index)); 2867 } 2868 } 2869 2870 static void 2871 do_vec_TBL (sim_cpu *cpu) 2872 { 2873 /* instr[31] = 0 2874 instr[30] = half(0)/full(1) 2875 instr[29,21] = 00 1110 000 2876 instr[20,16] = Vm 2877 instr[15] = 0 2878 instr[14,13] = vec length 2879 instr[12,10] = 000 2880 instr[9,5] = V start 2881 instr[4,0] = V dest */ 2882 2883 int full = INSTR (30, 30); 2884 int len = INSTR (14, 13) + 1; 2885 unsigned vm = INSTR (20, 16); 2886 unsigned vn = INSTR (9, 5); 2887 unsigned vd = INSTR (4, 0); 2888 unsigned i; 2889 2890 NYI_assert (29, 21, 0x070); 2891 NYI_assert (12, 10, 0); 2892 2893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2894 for (i = 0; i < (full ? 16 : 8); i++) 2895 { 2896 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i); 2897 uint8_t val; 2898 2899 if (selector < 16) 2900 val = aarch64_get_vec_u8 (cpu, vn, selector); 2901 else if (selector < 32) 2902 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16); 2903 else if (selector < 48) 2904 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32); 2905 else if (selector < 64) 2906 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48); 2907 else 2908 val = 0; 2909 2910 aarch64_set_vec_u8 (cpu, vd, i, val); 2911 } 2912 } 2913 2914 static void 2915 do_vec_TRN (sim_cpu *cpu) 2916 { 2917 /* instr[31] = 0 2918 instr[30] = half(0)/full(1) 2919 instr[29,24] = 00 1110 2920 instr[23,22] = size 2921 instr[21] = 0 2922 instr[20,16] = Vm 2923 instr[15] = 0 2924 instr[14] = TRN1 (0) / TRN2 (1) 2925 instr[13,10] = 1010 2926 instr[9,5] = V source 2927 instr[4,0] = V dest. */ 2928 2929 int full = INSTR (30, 30); 2930 int second = INSTR (14, 14); 2931 unsigned vm = INSTR (20, 16); 2932 unsigned vn = INSTR (9, 5); 2933 unsigned vd = INSTR (4, 0); 2934 unsigned i; 2935 2936 NYI_assert (29, 24, 0x0E); 2937 NYI_assert (13, 10, 0xA); 2938 2939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2940 switch (INSTR (23, 22)) 2941 { 2942 case 0: 2943 for (i = 0; i < (full ? 8 : 4); i++) 2944 { 2945 aarch64_set_vec_u8 2946 (cpu, vd, i * 2, 2947 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2)); 2948 aarch64_set_vec_u8 2949 (cpu, vd, 1 * 2 + 1, 2950 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1)); 2951 } 2952 break; 2953 2954 case 1: 2955 for (i = 0; i < (full ? 4 : 2); i++) 2956 { 2957 aarch64_set_vec_u16 2958 (cpu, vd, i * 2, 2959 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2)); 2960 aarch64_set_vec_u16 2961 (cpu, vd, 1 * 2 + 1, 2962 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1)); 2963 } 2964 break; 2965 2966 case 2: 2967 aarch64_set_vec_u32 2968 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0)); 2969 aarch64_set_vec_u32 2970 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1)); 2971 aarch64_set_vec_u32 2972 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2)); 2973 aarch64_set_vec_u32 2974 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3)); 2975 break; 2976 2977 case 3: 2978 if (! full) 2979 HALT_UNALLOC; 2980 2981 aarch64_set_vec_u64 (cpu, vd, 0, 2982 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0)); 2983 aarch64_set_vec_u64 (cpu, vd, 1, 2984 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1)); 2985 break; 2986 } 2987 } 2988 2989 static void 2990 do_vec_DUP_scalar_into_vector (sim_cpu *cpu) 2991 { 2992 /* instr[31] = 0 2993 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits 2994 [must be 1 for 64-bit xfer] 2995 instr[29,20] = 00 1110 0000 2996 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits, 2997 0100=> 32-bits. 1000=>64-bits 2998 instr[15,10] = 0000 11 2999 instr[9,5] = W source 3000 instr[4,0] = V dest. */ 3001 3002 unsigned i; 3003 unsigned Vd = INSTR (4, 0); 3004 unsigned Rs = INSTR (9, 5); 3005 int both = INSTR (30, 30); 3006 3007 NYI_assert (29, 20, 0x0E0); 3008 NYI_assert (15, 10, 0x03); 3009 3010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3011 switch (INSTR (19, 16)) 3012 { 3013 case 1: 3014 for (i = 0; i < (both ? 16 : 8); i++) 3015 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP)); 3016 break; 3017 3018 case 2: 3019 for (i = 0; i < (both ? 8 : 4); i++) 3020 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP)); 3021 break; 3022 3023 case 4: 3024 for (i = 0; i < (both ? 4 : 2); i++) 3025 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP)); 3026 break; 3027 3028 case 8: 3029 if (!both) 3030 HALT_NYI; 3031 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP)); 3032 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP)); 3033 break; 3034 3035 default: 3036 HALT_NYI; 3037 } 3038 } 3039 3040 static void 3041 do_vec_UZP (sim_cpu *cpu) 3042 { 3043 /* instr[31] = 0 3044 instr[30] = half(0)/full(1) 3045 instr[29,24] = 00 1110 3046 instr[23,22] = size: byte(00), half(01), word (10), long (11) 3047 instr[21] = 0 3048 instr[20,16] = Vm 3049 instr[15] = 0 3050 instr[14] = lower (0) / upper (1) 3051 instr[13,10] = 0110 3052 instr[9,5] = Vn 3053 instr[4,0] = Vd. */ 3054 3055 int full = INSTR (30, 30); 3056 int upper = INSTR (14, 14); 3057 3058 unsigned vm = INSTR (20, 16); 3059 unsigned vn = INSTR (9, 5); 3060 unsigned vd = INSTR (4, 0); 3061 3062 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); 3063 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); 3064 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); 3065 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); 3066 3067 uint64_t val1; 3068 uint64_t val2; 3069 3070 uint64_t input2 = full ? val_n2 : val_m1; 3071 3072 NYI_assert (29, 24, 0x0E); 3073 NYI_assert (21, 21, 0); 3074 NYI_assert (15, 15, 0); 3075 NYI_assert (13, 10, 6); 3076 3077 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3078 switch (INSTR (23, 22)) 3079 { 3080 case 0: 3081 val1 = (val_n1 >> (upper * 8)) & 0xFFULL; 3082 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL; 3083 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL; 3084 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL; 3085 3086 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL; 3087 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL; 3088 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL; 3089 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL; 3090 3091 if (full) 3092 { 3093 val2 = (val_m1 >> (upper * 8)) & 0xFFULL; 3094 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL; 3095 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL; 3096 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL; 3097 3098 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL; 3099 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL; 3100 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL; 3101 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL; 3102 } 3103 break; 3104 3105 case 1: 3106 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL; 3107 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL; 3108 3109 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;; 3110 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL; 3111 3112 if (full) 3113 { 3114 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL; 3115 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL; 3116 3117 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL; 3118 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL; 3119 } 3120 break; 3121 3122 case 2: 3123 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF; 3124 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL; 3125 3126 if (full) 3127 { 3128 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF; 3129 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL; 3130 } 3131 break; 3132 3133 case 3: 3134 if (! full) 3135 HALT_UNALLOC; 3136 3137 val1 = upper ? val_n2 : val_n1; 3138 val2 = upper ? val_m2 : val_m1; 3139 break; 3140 } 3141 3142 aarch64_set_vec_u64 (cpu, vd, 0, val1); 3143 if (full) 3144 aarch64_set_vec_u64 (cpu, vd, 1, val2); 3145 } 3146 3147 static void 3148 do_vec_ZIP (sim_cpu *cpu) 3149 { 3150 /* instr[31] = 0 3151 instr[30] = half(0)/full(1) 3152 instr[29,24] = 00 1110 3153 instr[23,22] = size: byte(00), hald(01), word (10), long (11) 3154 instr[21] = 0 3155 instr[20,16] = Vm 3156 instr[15] = 0 3157 instr[14] = lower (0) / upper (1) 3158 instr[13,10] = 1110 3159 instr[9,5] = Vn 3160 instr[4,0] = Vd. */ 3161 3162 int full = INSTR (30, 30); 3163 int upper = INSTR (14, 14); 3164 3165 unsigned vm = INSTR (20, 16); 3166 unsigned vn = INSTR (9, 5); 3167 unsigned vd = INSTR (4, 0); 3168 3169 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); 3170 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); 3171 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); 3172 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); 3173 3174 uint64_t val1 = 0; 3175 uint64_t val2 = 0; 3176 3177 uint64_t input1 = upper ? val_n1 : val_m1; 3178 uint64_t input2 = upper ? val_n2 : val_m2; 3179 3180 NYI_assert (29, 24, 0x0E); 3181 NYI_assert (21, 21, 0); 3182 NYI_assert (15, 15, 0); 3183 NYI_assert (13, 10, 0xE); 3184 3185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3186 switch (INSTR (23, 23)) 3187 { 3188 case 0: 3189 val1 = 3190 ((input1 << 0) & (0xFF << 0)) 3191 | ((input2 << 8) & (0xFF << 8)) 3192 | ((input1 << 8) & (0xFF << 16)) 3193 | ((input2 << 16) & (0xFF << 24)) 3194 | ((input1 << 16) & (0xFFULL << 32)) 3195 | ((input2 << 24) & (0xFFULL << 40)) 3196 | ((input1 << 24) & (0xFFULL << 48)) 3197 | ((input2 << 32) & (0xFFULL << 56)); 3198 3199 val2 = 3200 ((input1 >> 32) & (0xFF << 0)) 3201 | ((input2 >> 24) & (0xFF << 8)) 3202 | ((input1 >> 24) & (0xFF << 16)) 3203 | ((input2 >> 16) & (0xFF << 24)) 3204 | ((input1 >> 16) & (0xFFULL << 32)) 3205 | ((input2 >> 8) & (0xFFULL << 40)) 3206 | ((input1 >> 8) & (0xFFULL << 48)) 3207 | ((input2 >> 0) & (0xFFULL << 56)); 3208 break; 3209 3210 case 1: 3211 val1 = 3212 ((input1 << 0) & (0xFFFF << 0)) 3213 | ((input2 << 16) & (0xFFFF << 16)) 3214 | ((input1 << 16) & (0xFFFFULL << 32)) 3215 | ((input2 << 32) & (0xFFFFULL << 48)); 3216 3217 val2 = 3218 ((input1 >> 32) & (0xFFFF << 0)) 3219 | ((input2 >> 16) & (0xFFFF << 16)) 3220 | ((input1 >> 16) & (0xFFFFULL << 32)) 3221 | ((input2 >> 0) & (0xFFFFULL << 48)); 3222 break; 3223 3224 case 2: 3225 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32); 3226 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32); 3227 break; 3228 3229 case 3: 3230 val1 = input1; 3231 val2 = input2; 3232 break; 3233 } 3234 3235 aarch64_set_vec_u64 (cpu, vd, 0, val1); 3236 if (full) 3237 aarch64_set_vec_u64 (cpu, vd, 1, val2); 3238 } 3239 3240 /* Floating point immediates are encoded in 8 bits. 3241 fpimm[7] = sign bit. 3242 fpimm[6:4] = signed exponent. 3243 fpimm[3:0] = fraction (assuming leading 1). 3244 i.e. F = s * 1.f * 2^(e - b). */ 3245 3246 static float 3247 fp_immediate_for_encoding_32 (uint32_t imm8) 3248 { 3249 float u; 3250 uint32_t s, e, f, i; 3251 3252 s = (imm8 >> 7) & 0x1; 3253 e = (imm8 >> 4) & 0x7; 3254 f = imm8 & 0xf; 3255 3256 /* The fp value is s * n/16 * 2r where n is 16+e. */ 3257 u = (16.0 + f) / 16.0; 3258 3259 /* N.B. exponent is signed. */ 3260 if (e < 4) 3261 { 3262 int epos = e; 3263 3264 for (i = 0; i <= epos; i++) 3265 u *= 2.0; 3266 } 3267 else 3268 { 3269 int eneg = 7 - e; 3270 3271 for (i = 0; i < eneg; i++) 3272 u /= 2.0; 3273 } 3274 3275 if (s) 3276 u = - u; 3277 3278 return u; 3279 } 3280 3281 static double 3282 fp_immediate_for_encoding_64 (uint32_t imm8) 3283 { 3284 double u; 3285 uint32_t s, e, f, i; 3286 3287 s = (imm8 >> 7) & 0x1; 3288 e = (imm8 >> 4) & 0x7; 3289 f = imm8 & 0xf; 3290 3291 /* The fp value is s * n/16 * 2r where n is 16+e. */ 3292 u = (16.0 + f) / 16.0; 3293 3294 /* N.B. exponent is signed. */ 3295 if (e < 4) 3296 { 3297 int epos = e; 3298 3299 for (i = 0; i <= epos; i++) 3300 u *= 2.0; 3301 } 3302 else 3303 { 3304 int eneg = 7 - e; 3305 3306 for (i = 0; i < eneg; i++) 3307 u /= 2.0; 3308 } 3309 3310 if (s) 3311 u = - u; 3312 3313 return u; 3314 } 3315 3316 static void 3317 do_vec_MOV_immediate (sim_cpu *cpu) 3318 { 3319 /* instr[31] = 0 3320 instr[30] = full/half selector 3321 instr[29,19] = 00111100000 3322 instr[18,16] = high 3 bits of uimm8 3323 instr[15,12] = size & shift: 3324 0000 => 32-bit 3325 0010 => 32-bit + LSL#8 3326 0100 => 32-bit + LSL#16 3327 0110 => 32-bit + LSL#24 3328 1010 => 16-bit + LSL#8 3329 1000 => 16-bit 3330 1101 => 32-bit + MSL#16 3331 1100 => 32-bit + MSL#8 3332 1110 => 8-bit 3333 1111 => double 3334 instr[11,10] = 01 3335 instr[9,5] = low 5-bits of uimm8 3336 instr[4,0] = Vd. */ 3337 3338 int full = INSTR (30, 30); 3339 unsigned vd = INSTR (4, 0); 3340 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5); 3341 unsigned i; 3342 3343 NYI_assert (29, 19, 0x1E0); 3344 NYI_assert (11, 10, 1); 3345 3346 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3347 switch (INSTR (15, 12)) 3348 { 3349 case 0x0: /* 32-bit, no shift. */ 3350 case 0x2: /* 32-bit, shift by 8. */ 3351 case 0x4: /* 32-bit, shift by 16. */ 3352 case 0x6: /* 32-bit, shift by 24. */ 3353 val <<= (8 * INSTR (14, 13)); 3354 for (i = 0; i < (full ? 4 : 2); i++) 3355 aarch64_set_vec_u32 (cpu, vd, i, val); 3356 break; 3357 3358 case 0xa: /* 16-bit, shift by 8. */ 3359 val <<= 8; 3360 /* Fall through. */ 3361 case 0x8: /* 16-bit, no shift. */ 3362 for (i = 0; i < (full ? 8 : 4); i++) 3363 aarch64_set_vec_u16 (cpu, vd, i, val); 3364 break; 3365 3366 case 0xd: /* 32-bit, mask shift by 16. */ 3367 val <<= 8; 3368 val |= 0xFF; 3369 /* Fall through. */ 3370 case 0xc: /* 32-bit, mask shift by 8. */ 3371 val <<= 8; 3372 val |= 0xFF; 3373 for (i = 0; i < (full ? 4 : 2); i++) 3374 aarch64_set_vec_u32 (cpu, vd, i, val); 3375 break; 3376 3377 case 0xe: /* 8-bit, no shift. */ 3378 for (i = 0; i < (full ? 16 : 8); i++) 3379 aarch64_set_vec_u8 (cpu, vd, i, val); 3380 break; 3381 3382 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */ 3383 { 3384 float u = fp_immediate_for_encoding_32 (val); 3385 for (i = 0; i < (full ? 4 : 2); i++) 3386 aarch64_set_vec_float (cpu, vd, i, u); 3387 break; 3388 } 3389 3390 default: 3391 HALT_NYI; 3392 } 3393 } 3394 3395 static void 3396 do_vec_MVNI (sim_cpu *cpu) 3397 { 3398 /* instr[31] = 0 3399 instr[30] = full/half selector 3400 instr[29,19] = 10111100000 3401 instr[18,16] = high 3 bits of uimm8 3402 instr[15,12] = selector 3403 instr[11,10] = 01 3404 instr[9,5] = low 5-bits of uimm8 3405 instr[4,0] = Vd. */ 3406 3407 int full = INSTR (30, 30); 3408 unsigned vd = INSTR (4, 0); 3409 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5); 3410 unsigned i; 3411 3412 NYI_assert (29, 19, 0x5E0); 3413 NYI_assert (11, 10, 1); 3414 3415 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3416 switch (INSTR (15, 12)) 3417 { 3418 case 0x0: /* 32-bit, no shift. */ 3419 case 0x2: /* 32-bit, shift by 8. */ 3420 case 0x4: /* 32-bit, shift by 16. */ 3421 case 0x6: /* 32-bit, shift by 24. */ 3422 val <<= (8 * INSTR (14, 13)); 3423 val = ~ val; 3424 for (i = 0; i < (full ? 4 : 2); i++) 3425 aarch64_set_vec_u32 (cpu, vd, i, val); 3426 return; 3427 3428 case 0xa: /* 16-bit, 8 bit shift. */ 3429 val <<= 8; 3430 case 0x8: /* 16-bit, no shift. */ 3431 val = ~ val; 3432 for (i = 0; i < (full ? 8 : 4); i++) 3433 aarch64_set_vec_u16 (cpu, vd, i, val); 3434 return; 3435 3436 case 0xd: /* 32-bit, mask shift by 16. */ 3437 val <<= 8; 3438 val |= 0xFF; 3439 case 0xc: /* 32-bit, mask shift by 8. */ 3440 val <<= 8; 3441 val |= 0xFF; 3442 val = ~ val; 3443 for (i = 0; i < (full ? 4 : 2); i++) 3444 aarch64_set_vec_u32 (cpu, vd, i, val); 3445 return; 3446 3447 case 0xE: /* MOVI Dn, #mask64 */ 3448 { 3449 uint64_t mask = 0; 3450 3451 for (i = 0; i < 8; i++) 3452 if (val & (1 << i)) 3453 mask |= (0xFFUL << (i * 8)); 3454 aarch64_set_vec_u64 (cpu, vd, 0, mask); 3455 aarch64_set_vec_u64 (cpu, vd, 1, mask); 3456 return; 3457 } 3458 3459 case 0xf: /* FMOV Vd.2D, #fpimm. */ 3460 { 3461 double u = fp_immediate_for_encoding_64 (val); 3462 3463 if (! full) 3464 HALT_UNALLOC; 3465 3466 aarch64_set_vec_double (cpu, vd, 0, u); 3467 aarch64_set_vec_double (cpu, vd, 1, u); 3468 return; 3469 } 3470 3471 default: 3472 HALT_NYI; 3473 } 3474 } 3475 3476 #define ABS(A) ((A) < 0 ? - (A) : (A)) 3477 3478 static void 3479 do_vec_ABS (sim_cpu *cpu) 3480 { 3481 /* instr[31] = 0 3482 instr[30] = half(0)/full(1) 3483 instr[29,24] = 00 1110 3484 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit 3485 instr[21,10] = 10 0000 1011 10 3486 instr[9,5] = Vn 3487 instr[4.0] = Vd. */ 3488 3489 unsigned vn = INSTR (9, 5); 3490 unsigned vd = INSTR (4, 0); 3491 unsigned full = INSTR (30, 30); 3492 unsigned i; 3493 3494 NYI_assert (29, 24, 0x0E); 3495 NYI_assert (21, 10, 0x82E); 3496 3497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3498 switch (INSTR (23, 22)) 3499 { 3500 case 0: 3501 for (i = 0; i < (full ? 16 : 8); i++) 3502 aarch64_set_vec_s8 (cpu, vd, i, 3503 ABS (aarch64_get_vec_s8 (cpu, vn, i))); 3504 break; 3505 3506 case 1: 3507 for (i = 0; i < (full ? 8 : 4); i++) 3508 aarch64_set_vec_s16 (cpu, vd, i, 3509 ABS (aarch64_get_vec_s16 (cpu, vn, i))); 3510 break; 3511 3512 case 2: 3513 for (i = 0; i < (full ? 4 : 2); i++) 3514 aarch64_set_vec_s32 (cpu, vd, i, 3515 ABS (aarch64_get_vec_s32 (cpu, vn, i))); 3516 break; 3517 3518 case 3: 3519 if (! full) 3520 HALT_NYI; 3521 for (i = 0; i < 2; i++) 3522 aarch64_set_vec_s64 (cpu, vd, i, 3523 ABS (aarch64_get_vec_s64 (cpu, vn, i))); 3524 break; 3525 } 3526 } 3527 3528 static void 3529 do_vec_ADDV (sim_cpu *cpu) 3530 { 3531 /* instr[31] = 0 3532 instr[30] = full/half selector 3533 instr[29,24] = 00 1110 3534 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit 3535 instr[21,10] = 11 0001 1011 10 3536 instr[9,5] = Vm 3537 instr[4.0] = Rd. */ 3538 3539 unsigned vm = INSTR (9, 5); 3540 unsigned rd = INSTR (4, 0); 3541 unsigned i; 3542 int full = INSTR (30, 30); 3543 3544 NYI_assert (29, 24, 0x0E); 3545 NYI_assert (21, 10, 0xC6E); 3546 3547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3548 switch (INSTR (23, 22)) 3549 { 3550 case 0: 3551 { 3552 uint8_t val = 0; 3553 for (i = 0; i < (full ? 16 : 8); i++) 3554 val += aarch64_get_vec_u8 (cpu, vm, i); 3555 aarch64_set_vec_u64 (cpu, rd, 0, val); 3556 return; 3557 } 3558 3559 case 1: 3560 { 3561 uint16_t val = 0; 3562 for (i = 0; i < (full ? 8 : 4); i++) 3563 val += aarch64_get_vec_u16 (cpu, vm, i); 3564 aarch64_set_vec_u64 (cpu, rd, 0, val); 3565 return; 3566 } 3567 3568 case 2: 3569 { 3570 uint32_t val = 0; 3571 if (! full) 3572 HALT_UNALLOC; 3573 for (i = 0; i < 4; i++) 3574 val += aarch64_get_vec_u32 (cpu, vm, i); 3575 aarch64_set_vec_u64 (cpu, rd, 0, val); 3576 return; 3577 } 3578 3579 case 3: 3580 HALT_UNALLOC; 3581 } 3582 } 3583 3584 static void 3585 do_vec_ins_2 (sim_cpu *cpu) 3586 { 3587 /* instr[31,21] = 01001110000 3588 instr[20,18] = size & element selector 3589 instr[17,14] = 0000 3590 instr[13] = direction: to vec(0), from vec (1) 3591 instr[12,10] = 111 3592 instr[9,5] = Vm 3593 instr[4,0] = Vd. */ 3594 3595 unsigned elem; 3596 unsigned vm = INSTR (9, 5); 3597 unsigned vd = INSTR (4, 0); 3598 3599 NYI_assert (31, 21, 0x270); 3600 NYI_assert (17, 14, 0); 3601 NYI_assert (12, 10, 7); 3602 3603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3604 if (INSTR (13, 13) == 1) 3605 { 3606 if (INSTR (18, 18) == 1) 3607 { 3608 /* 32-bit moves. */ 3609 elem = INSTR (20, 19); 3610 aarch64_set_reg_u64 (cpu, vd, NO_SP, 3611 aarch64_get_vec_u32 (cpu, vm, elem)); 3612 } 3613 else 3614 { 3615 /* 64-bit moves. */ 3616 if (INSTR (19, 19) != 1) 3617 HALT_NYI; 3618 3619 elem = INSTR (20, 20); 3620 aarch64_set_reg_u64 (cpu, vd, NO_SP, 3621 aarch64_get_vec_u64 (cpu, vm, elem)); 3622 } 3623 } 3624 else 3625 { 3626 if (INSTR (18, 18) == 1) 3627 { 3628 /* 32-bit moves. */ 3629 elem = INSTR (20, 19); 3630 aarch64_set_vec_u32 (cpu, vd, elem, 3631 aarch64_get_reg_u32 (cpu, vm, NO_SP)); 3632 } 3633 else 3634 { 3635 /* 64-bit moves. */ 3636 if (INSTR (19, 19) != 1) 3637 HALT_NYI; 3638 3639 elem = INSTR (20, 20); 3640 aarch64_set_vec_u64 (cpu, vd, elem, 3641 aarch64_get_reg_u64 (cpu, vm, NO_SP)); 3642 } 3643 } 3644 } 3645 3646 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \ 3647 do \ 3648 { \ 3649 DST_TYPE a[N], b[N]; \ 3650 \ 3651 for (i = 0; i < (N); i++) \ 3652 { \ 3653 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \ 3654 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \ 3655 } \ 3656 for (i = 0; i < (N); i++) \ 3657 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \ 3658 } \ 3659 while (0) 3660 3661 static void 3662 do_vec_mull (sim_cpu *cpu) 3663 { 3664 /* instr[31] = 0 3665 instr[30] = lower(0)/upper(1) selector 3666 instr[29] = signed(0)/unsigned(1) 3667 instr[28,24] = 0 1110 3668 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10) 3669 instr[21] = 1 3670 instr[20,16] = Vm 3671 instr[15,10] = 11 0000 3672 instr[9,5] = Vn 3673 instr[4.0] = Vd. */ 3674 3675 int unsign = INSTR (29, 29); 3676 int bias = INSTR (30, 30); 3677 unsigned vm = INSTR (20, 16); 3678 unsigned vn = INSTR ( 9, 5); 3679 unsigned vd = INSTR ( 4, 0); 3680 unsigned i; 3681 3682 NYI_assert (28, 24, 0x0E); 3683 NYI_assert (15, 10, 0x30); 3684 3685 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3686 /* NB: Read source values before writing results, in case 3687 the source and destination vectors are the same. */ 3688 switch (INSTR (23, 22)) 3689 { 3690 case 0: 3691 if (bias) 3692 bias = 8; 3693 if (unsign) 3694 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16); 3695 else 3696 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16); 3697 return; 3698 3699 case 1: 3700 if (bias) 3701 bias = 4; 3702 if (unsign) 3703 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32); 3704 else 3705 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32); 3706 return; 3707 3708 case 2: 3709 if (bias) 3710 bias = 2; 3711 if (unsign) 3712 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64); 3713 else 3714 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64); 3715 return; 3716 3717 case 3: 3718 HALT_NYI; 3719 } 3720 } 3721 3722 static void 3723 do_vec_fadd (sim_cpu *cpu) 3724 { 3725 /* instr[31] = 0 3726 instr[30] = half(0)/full(1) 3727 instr[29,24] = 001110 3728 instr[23] = FADD(0)/FSUB(1) 3729 instr[22] = float (0)/double(1) 3730 instr[21] = 1 3731 instr[20,16] = Vm 3732 instr[15,10] = 110101 3733 instr[9,5] = Vn 3734 instr[4.0] = Vd. */ 3735 3736 unsigned vm = INSTR (20, 16); 3737 unsigned vn = INSTR (9, 5); 3738 unsigned vd = INSTR (4, 0); 3739 unsigned i; 3740 int full = INSTR (30, 30); 3741 3742 NYI_assert (29, 24, 0x0E); 3743 NYI_assert (21, 21, 1); 3744 NYI_assert (15, 10, 0x35); 3745 3746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3747 if (INSTR (23, 23)) 3748 { 3749 if (INSTR (22, 22)) 3750 { 3751 if (! full) 3752 HALT_NYI; 3753 3754 for (i = 0; i < 2; i++) 3755 aarch64_set_vec_double (cpu, vd, i, 3756 aarch64_get_vec_double (cpu, vn, i) 3757 - aarch64_get_vec_double (cpu, vm, i)); 3758 } 3759 else 3760 { 3761 for (i = 0; i < (full ? 4 : 2); i++) 3762 aarch64_set_vec_float (cpu, vd, i, 3763 aarch64_get_vec_float (cpu, vn, i) 3764 - aarch64_get_vec_float (cpu, vm, i)); 3765 } 3766 } 3767 else 3768 { 3769 if (INSTR (22, 22)) 3770 { 3771 if (! full) 3772 HALT_NYI; 3773 3774 for (i = 0; i < 2; i++) 3775 aarch64_set_vec_double (cpu, vd, i, 3776 aarch64_get_vec_double (cpu, vm, i) 3777 + aarch64_get_vec_double (cpu, vn, i)); 3778 } 3779 else 3780 { 3781 for (i = 0; i < (full ? 4 : 2); i++) 3782 aarch64_set_vec_float (cpu, vd, i, 3783 aarch64_get_vec_float (cpu, vm, i) 3784 + aarch64_get_vec_float (cpu, vn, i)); 3785 } 3786 } 3787 } 3788 3789 static void 3790 do_vec_add (sim_cpu *cpu) 3791 { 3792 /* instr[31] = 0 3793 instr[30] = full/half selector 3794 instr[29,24] = 001110 3795 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit 3796 instr[21] = 1 3797 instr[20,16] = Vn 3798 instr[15,10] = 100001 3799 instr[9,5] = Vm 3800 instr[4.0] = Vd. */ 3801 3802 unsigned vm = INSTR (20, 16); 3803 unsigned vn = INSTR (9, 5); 3804 unsigned vd = INSTR (4, 0); 3805 unsigned i; 3806 int full = INSTR (30, 30); 3807 3808 NYI_assert (29, 24, 0x0E); 3809 NYI_assert (21, 21, 1); 3810 NYI_assert (15, 10, 0x21); 3811 3812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3813 switch (INSTR (23, 22)) 3814 { 3815 case 0: 3816 for (i = 0; i < (full ? 16 : 8); i++) 3817 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) 3818 + aarch64_get_vec_u8 (cpu, vm, i)); 3819 return; 3820 3821 case 1: 3822 for (i = 0; i < (full ? 8 : 4); i++) 3823 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) 3824 + aarch64_get_vec_u16 (cpu, vm, i)); 3825 return; 3826 3827 case 2: 3828 for (i = 0; i < (full ? 4 : 2); i++) 3829 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) 3830 + aarch64_get_vec_u32 (cpu, vm, i)); 3831 return; 3832 3833 case 3: 3834 if (! full) 3835 HALT_UNALLOC; 3836 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0) 3837 + aarch64_get_vec_u64 (cpu, vm, 0)); 3838 aarch64_set_vec_u64 (cpu, vd, 1, 3839 aarch64_get_vec_u64 (cpu, vn, 1) 3840 + aarch64_get_vec_u64 (cpu, vm, 1)); 3841 return; 3842 } 3843 } 3844 3845 static void 3846 do_vec_mul (sim_cpu *cpu) 3847 { 3848 /* instr[31] = 0 3849 instr[30] = full/half selector 3850 instr[29,24] = 00 1110 3851 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit 3852 instr[21] = 1 3853 instr[20,16] = Vn 3854 instr[15,10] = 10 0111 3855 instr[9,5] = Vm 3856 instr[4.0] = Vd. */ 3857 3858 unsigned vm = INSTR (20, 16); 3859 unsigned vn = INSTR (9, 5); 3860 unsigned vd = INSTR (4, 0); 3861 unsigned i; 3862 int full = INSTR (30, 30); 3863 int bias = 0; 3864 3865 NYI_assert (29, 24, 0x0E); 3866 NYI_assert (21, 21, 1); 3867 NYI_assert (15, 10, 0x27); 3868 3869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3870 switch (INSTR (23, 22)) 3871 { 3872 case 0: 3873 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8); 3874 return; 3875 3876 case 1: 3877 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16); 3878 return; 3879 3880 case 2: 3881 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32); 3882 return; 3883 3884 case 3: 3885 HALT_UNALLOC; 3886 } 3887 } 3888 3889 static void 3890 do_vec_MLA (sim_cpu *cpu) 3891 { 3892 /* instr[31] = 0 3893 instr[30] = full/half selector 3894 instr[29,24] = 00 1110 3895 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit 3896 instr[21] = 1 3897 instr[20,16] = Vn 3898 instr[15,10] = 1001 01 3899 instr[9,5] = Vm 3900 instr[4.0] = Vd. */ 3901 3902 unsigned vm = INSTR (20, 16); 3903 unsigned vn = INSTR (9, 5); 3904 unsigned vd = INSTR (4, 0); 3905 unsigned i; 3906 int full = INSTR (30, 30); 3907 3908 NYI_assert (29, 24, 0x0E); 3909 NYI_assert (21, 21, 1); 3910 NYI_assert (15, 10, 0x25); 3911 3912 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3913 switch (INSTR (23, 22)) 3914 { 3915 case 0: 3916 for (i = 0; i < (full ? 16 : 8); i++) 3917 aarch64_set_vec_u8 (cpu, vd, i, 3918 aarch64_get_vec_u8 (cpu, vd, i) 3919 + (aarch64_get_vec_u8 (cpu, vn, i) 3920 * aarch64_get_vec_u8 (cpu, vm, i))); 3921 return; 3922 3923 case 1: 3924 for (i = 0; i < (full ? 8 : 4); i++) 3925 aarch64_set_vec_u16 (cpu, vd, i, 3926 aarch64_get_vec_u16 (cpu, vd, i) 3927 + (aarch64_get_vec_u16 (cpu, vn, i) 3928 * aarch64_get_vec_u16 (cpu, vm, i))); 3929 return; 3930 3931 case 2: 3932 for (i = 0; i < (full ? 4 : 2); i++) 3933 aarch64_set_vec_u32 (cpu, vd, i, 3934 aarch64_get_vec_u32 (cpu, vd, i) 3935 + (aarch64_get_vec_u32 (cpu, vn, i) 3936 * aarch64_get_vec_u32 (cpu, vm, i))); 3937 return; 3938 3939 default: 3940 HALT_UNALLOC; 3941 } 3942 } 3943 3944 static float 3945 fmaxnm (float a, float b) 3946 { 3947 if (! isnan (a)) 3948 { 3949 if (! isnan (b)) 3950 return a > b ? a : b; 3951 return a; 3952 } 3953 else if (! isnan (b)) 3954 return b; 3955 return a; 3956 } 3957 3958 static float 3959 fminnm (float a, float b) 3960 { 3961 if (! isnan (a)) 3962 { 3963 if (! isnan (b)) 3964 return a < b ? a : b; 3965 return a; 3966 } 3967 else if (! isnan (b)) 3968 return b; 3969 return a; 3970 } 3971 3972 static double 3973 dmaxnm (double a, double b) 3974 { 3975 if (! isnan (a)) 3976 { 3977 if (! isnan (b)) 3978 return a > b ? a : b; 3979 return a; 3980 } 3981 else if (! isnan (b)) 3982 return b; 3983 return a; 3984 } 3985 3986 static double 3987 dminnm (double a, double b) 3988 { 3989 if (! isnan (a)) 3990 { 3991 if (! isnan (b)) 3992 return a < b ? a : b; 3993 return a; 3994 } 3995 else if (! isnan (b)) 3996 return b; 3997 return a; 3998 } 3999 4000 static void 4001 do_vec_FminmaxNMP (sim_cpu *cpu) 4002 { 4003 /* instr [31] = 0 4004 instr [30] = half (0)/full (1) 4005 instr [29,24] = 10 1110 4006 instr [23] = max(0)/min(1) 4007 instr [22] = float (0)/double (1) 4008 instr [21] = 1 4009 instr [20,16] = Vn 4010 instr [15,10] = 1100 01 4011 instr [9,5] = Vm 4012 instr [4.0] = Vd. */ 4013 4014 unsigned vm = INSTR (20, 16); 4015 unsigned vn = INSTR (9, 5); 4016 unsigned vd = INSTR (4, 0); 4017 int full = INSTR (30, 30); 4018 4019 NYI_assert (29, 24, 0x2E); 4020 NYI_assert (21, 21, 1); 4021 NYI_assert (15, 10, 0x31); 4022 4023 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4024 if (INSTR (22, 22)) 4025 { 4026 double (* fn)(double, double) = INSTR (23, 23) 4027 ? dminnm : dmaxnm; 4028 4029 if (! full) 4030 HALT_NYI; 4031 aarch64_set_vec_double (cpu, vd, 0, 4032 fn (aarch64_get_vec_double (cpu, vn, 0), 4033 aarch64_get_vec_double (cpu, vn, 1))); 4034 aarch64_set_vec_double (cpu, vd, 0, 4035 fn (aarch64_get_vec_double (cpu, vm, 0), 4036 aarch64_get_vec_double (cpu, vm, 1))); 4037 } 4038 else 4039 { 4040 float (* fn)(float, float) = INSTR (23, 23) 4041 ? fminnm : fmaxnm; 4042 4043 aarch64_set_vec_float (cpu, vd, 0, 4044 fn (aarch64_get_vec_float (cpu, vn, 0), 4045 aarch64_get_vec_float (cpu, vn, 1))); 4046 if (full) 4047 aarch64_set_vec_float (cpu, vd, 1, 4048 fn (aarch64_get_vec_float (cpu, vn, 2), 4049 aarch64_get_vec_float (cpu, vn, 3))); 4050 4051 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1), 4052 fn (aarch64_get_vec_float (cpu, vm, 0), 4053 aarch64_get_vec_float (cpu, vm, 1))); 4054 if (full) 4055 aarch64_set_vec_float (cpu, vd, 3, 4056 fn (aarch64_get_vec_float (cpu, vm, 2), 4057 aarch64_get_vec_float (cpu, vm, 3))); 4058 } 4059 } 4060 4061 static void 4062 do_vec_AND (sim_cpu *cpu) 4063 { 4064 /* instr[31] = 0 4065 instr[30] = half (0)/full (1) 4066 instr[29,21] = 001110001 4067 instr[20,16] = Vm 4068 instr[15,10] = 000111 4069 instr[9,5] = Vn 4070 instr[4.0] = Vd. */ 4071 4072 unsigned vm = INSTR (20, 16); 4073 unsigned vn = INSTR (9, 5); 4074 unsigned vd = INSTR (4, 0); 4075 unsigned i; 4076 int full = INSTR (30, 30); 4077 4078 NYI_assert (29, 21, 0x071); 4079 NYI_assert (15, 10, 0x07); 4080 4081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4082 for (i = 0; i < (full ? 4 : 2); i++) 4083 aarch64_set_vec_u32 (cpu, vd, i, 4084 aarch64_get_vec_u32 (cpu, vn, i) 4085 & aarch64_get_vec_u32 (cpu, vm, i)); 4086 } 4087 4088 static void 4089 do_vec_BSL (sim_cpu *cpu) 4090 { 4091 /* instr[31] = 0 4092 instr[30] = half (0)/full (1) 4093 instr[29,21] = 101110011 4094 instr[20,16] = Vm 4095 instr[15,10] = 000111 4096 instr[9,5] = Vn 4097 instr[4.0] = Vd. */ 4098 4099 unsigned vm = INSTR (20, 16); 4100 unsigned vn = INSTR (9, 5); 4101 unsigned vd = INSTR (4, 0); 4102 unsigned i; 4103 int full = INSTR (30, 30); 4104 4105 NYI_assert (29, 21, 0x173); 4106 NYI_assert (15, 10, 0x07); 4107 4108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4109 for (i = 0; i < (full ? 16 : 8); i++) 4110 aarch64_set_vec_u8 (cpu, vd, i, 4111 ( aarch64_get_vec_u8 (cpu, vd, i) 4112 & aarch64_get_vec_u8 (cpu, vn, i)) 4113 | ((~ aarch64_get_vec_u8 (cpu, vd, i)) 4114 & aarch64_get_vec_u8 (cpu, vm, i))); 4115 } 4116 4117 static void 4118 do_vec_EOR (sim_cpu *cpu) 4119 { 4120 /* instr[31] = 0 4121 instr[30] = half (0)/full (1) 4122 instr[29,21] = 10 1110 001 4123 instr[20,16] = Vm 4124 instr[15,10] = 000111 4125 instr[9,5] = Vn 4126 instr[4.0] = Vd. */ 4127 4128 unsigned vm = INSTR (20, 16); 4129 unsigned vn = INSTR (9, 5); 4130 unsigned vd = INSTR (4, 0); 4131 unsigned i; 4132 int full = INSTR (30, 30); 4133 4134 NYI_assert (29, 21, 0x171); 4135 NYI_assert (15, 10, 0x07); 4136 4137 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4138 for (i = 0; i < (full ? 4 : 2); i++) 4139 aarch64_set_vec_u32 (cpu, vd, i, 4140 aarch64_get_vec_u32 (cpu, vn, i) 4141 ^ aarch64_get_vec_u32 (cpu, vm, i)); 4142 } 4143 4144 static void 4145 do_vec_bit (sim_cpu *cpu) 4146 { 4147 /* instr[31] = 0 4148 instr[30] = half (0)/full (1) 4149 instr[29,23] = 10 1110 1 4150 instr[22] = BIT (0) / BIF (1) 4151 instr[21] = 1 4152 instr[20,16] = Vm 4153 instr[15,10] = 0001 11 4154 instr[9,5] = Vn 4155 instr[4.0] = Vd. */ 4156 4157 unsigned vm = INSTR (20, 16); 4158 unsigned vn = INSTR (9, 5); 4159 unsigned vd = INSTR (4, 0); 4160 unsigned full = INSTR (30, 30); 4161 unsigned test_false = INSTR (22, 22); 4162 unsigned i; 4163 4164 NYI_assert (29, 23, 0x5D); 4165 NYI_assert (21, 21, 1); 4166 NYI_assert (15, 10, 0x07); 4167 4168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4169 for (i = 0; i < (full ? 4 : 2); i++) 4170 { 4171 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i); 4172 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i); 4173 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i); 4174 if (test_false) 4175 aarch64_set_vec_u32 (cpu, vd, i, 4176 (vd_val & vm_val) | (vn_val & ~vm_val)); 4177 else 4178 aarch64_set_vec_u32 (cpu, vd, i, 4179 (vd_val & ~vm_val) | (vn_val & vm_val)); 4180 } 4181 } 4182 4183 static void 4184 do_vec_ORN (sim_cpu *cpu) 4185 { 4186 /* instr[31] = 0 4187 instr[30] = half (0)/full (1) 4188 instr[29,21] = 00 1110 111 4189 instr[20,16] = Vm 4190 instr[15,10] = 00 0111 4191 instr[9,5] = Vn 4192 instr[4.0] = Vd. */ 4193 4194 unsigned vm = INSTR (20, 16); 4195 unsigned vn = INSTR (9, 5); 4196 unsigned vd = INSTR (4, 0); 4197 unsigned i; 4198 int full = INSTR (30, 30); 4199 4200 NYI_assert (29, 21, 0x077); 4201 NYI_assert (15, 10, 0x07); 4202 4203 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4204 for (i = 0; i < (full ? 16 : 8); i++) 4205 aarch64_set_vec_u8 (cpu, vd, i, 4206 aarch64_get_vec_u8 (cpu, vn, i) 4207 | ~ aarch64_get_vec_u8 (cpu, vm, i)); 4208 } 4209 4210 static void 4211 do_vec_ORR (sim_cpu *cpu) 4212 { 4213 /* instr[31] = 0 4214 instr[30] = half (0)/full (1) 4215 instr[29,21] = 00 1110 101 4216 instr[20,16] = Vm 4217 instr[15,10] = 0001 11 4218 instr[9,5] = Vn 4219 instr[4.0] = Vd. */ 4220 4221 unsigned vm = INSTR (20, 16); 4222 unsigned vn = INSTR (9, 5); 4223 unsigned vd = INSTR (4, 0); 4224 unsigned i; 4225 int full = INSTR (30, 30); 4226 4227 NYI_assert (29, 21, 0x075); 4228 NYI_assert (15, 10, 0x07); 4229 4230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4231 for (i = 0; i < (full ? 16 : 8); i++) 4232 aarch64_set_vec_u8 (cpu, vd, i, 4233 aarch64_get_vec_u8 (cpu, vn, i) 4234 | aarch64_get_vec_u8 (cpu, vm, i)); 4235 } 4236 4237 static void 4238 do_vec_BIC (sim_cpu *cpu) 4239 { 4240 /* instr[31] = 0 4241 instr[30] = half (0)/full (1) 4242 instr[29,21] = 00 1110 011 4243 instr[20,16] = Vm 4244 instr[15,10] = 00 0111 4245 instr[9,5] = Vn 4246 instr[4.0] = Vd. */ 4247 4248 unsigned vm = INSTR (20, 16); 4249 unsigned vn = INSTR (9, 5); 4250 unsigned vd = INSTR (4, 0); 4251 unsigned i; 4252 int full = INSTR (30, 30); 4253 4254 NYI_assert (29, 21, 0x073); 4255 NYI_assert (15, 10, 0x07); 4256 4257 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4258 for (i = 0; i < (full ? 16 : 8); i++) 4259 aarch64_set_vec_u8 (cpu, vd, i, 4260 aarch64_get_vec_u8 (cpu, vn, i) 4261 & ~ aarch64_get_vec_u8 (cpu, vm, i)); 4262 } 4263 4264 static void 4265 do_vec_XTN (sim_cpu *cpu) 4266 { 4267 /* instr[31] = 0 4268 instr[30] = first part (0)/ second part (1) 4269 instr[29,24] = 00 1110 4270 instr[23,22] = size: byte(00), half(01), word (10) 4271 instr[21,10] = 1000 0100 1010 4272 instr[9,5] = Vs 4273 instr[4,0] = Vd. */ 4274 4275 unsigned vs = INSTR (9, 5); 4276 unsigned vd = INSTR (4, 0); 4277 unsigned bias = INSTR (30, 30); 4278 unsigned i; 4279 4280 NYI_assert (29, 24, 0x0E); 4281 NYI_assert (21, 10, 0x84A); 4282 4283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4284 switch (INSTR (23, 22)) 4285 { 4286 case 0: 4287 for (i = 0; i < 8; i++) 4288 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8), 4289 aarch64_get_vec_u16 (cpu, vs, i)); 4290 return; 4291 4292 case 1: 4293 for (i = 0; i < 4; i++) 4294 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4), 4295 aarch64_get_vec_u32 (cpu, vs, i)); 4296 return; 4297 4298 case 2: 4299 for (i = 0; i < 2; i++) 4300 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2), 4301 aarch64_get_vec_u64 (cpu, vs, i)); 4302 return; 4303 } 4304 } 4305 4306 /* Return the number of bits set in the input value. */ 4307 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) 4308 # define popcount __builtin_popcount 4309 #else 4310 static int 4311 popcount (unsigned char x) 4312 { 4313 static const unsigned char popcnt[16] = 4314 { 4315 0, 1, 1, 2, 4316 1, 2, 2, 3, 4317 1, 2, 2, 3, 4318 2, 3, 3, 4 4319 }; 4320 4321 /* Only counts the low 8 bits of the input as that is all we need. */ 4322 return popcnt[x % 16] + popcnt[x / 16]; 4323 } 4324 #endif 4325 4326 static void 4327 do_vec_CNT (sim_cpu *cpu) 4328 { 4329 /* instr[31] = 0 4330 instr[30] = half (0)/ full (1) 4331 instr[29,24] = 00 1110 4332 instr[23,22] = size: byte(00) 4333 instr[21,10] = 1000 0001 0110 4334 instr[9,5] = Vs 4335 instr[4,0] = Vd. */ 4336 4337 unsigned vs = INSTR (9, 5); 4338 unsigned vd = INSTR (4, 0); 4339 int full = INSTR (30, 30); 4340 int size = INSTR (23, 22); 4341 int i; 4342 4343 NYI_assert (29, 24, 0x0E); 4344 NYI_assert (21, 10, 0x816); 4345 4346 if (size != 0) 4347 HALT_UNALLOC; 4348 4349 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4350 4351 for (i = 0; i < (full ? 16 : 8); i++) 4352 aarch64_set_vec_u8 (cpu, vd, i, 4353 popcount (aarch64_get_vec_u8 (cpu, vs, i))); 4354 } 4355 4356 static void 4357 do_vec_maxv (sim_cpu *cpu) 4358 { 4359 /* instr[31] = 0 4360 instr[30] = half(0)/full(1) 4361 instr[29] = signed (0)/unsigned(1) 4362 instr[28,24] = 0 1110 4363 instr[23,22] = size: byte(00), half(01), word (10) 4364 instr[21] = 1 4365 instr[20,17] = 1 000 4366 instr[16] = max(0)/min(1) 4367 instr[15,10] = 1010 10 4368 instr[9,5] = V source 4369 instr[4.0] = R dest. */ 4370 4371 unsigned vs = INSTR (9, 5); 4372 unsigned rd = INSTR (4, 0); 4373 unsigned full = INSTR (30, 30); 4374 unsigned i; 4375 4376 NYI_assert (28, 24, 0x0E); 4377 NYI_assert (21, 21, 1); 4378 NYI_assert (20, 17, 8); 4379 NYI_assert (15, 10, 0x2A); 4380 4381 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4382 switch ((INSTR (29, 29) << 1) | INSTR (16, 16)) 4383 { 4384 case 0: /* SMAXV. */ 4385 { 4386 int64_t smax; 4387 switch (INSTR (23, 22)) 4388 { 4389 case 0: 4390 smax = aarch64_get_vec_s8 (cpu, vs, 0); 4391 for (i = 1; i < (full ? 16 : 8); i++) 4392 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i)); 4393 break; 4394 case 1: 4395 smax = aarch64_get_vec_s16 (cpu, vs, 0); 4396 for (i = 1; i < (full ? 8 : 4); i++) 4397 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i)); 4398 break; 4399 case 2: 4400 smax = aarch64_get_vec_s32 (cpu, vs, 0); 4401 for (i = 1; i < (full ? 4 : 2); i++) 4402 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i)); 4403 break; 4404 case 3: 4405 HALT_UNALLOC; 4406 } 4407 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax); 4408 return; 4409 } 4410 4411 case 1: /* SMINV. */ 4412 { 4413 int64_t smin; 4414 switch (INSTR (23, 22)) 4415 { 4416 case 0: 4417 smin = aarch64_get_vec_s8 (cpu, vs, 0); 4418 for (i = 1; i < (full ? 16 : 8); i++) 4419 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i)); 4420 break; 4421 case 1: 4422 smin = aarch64_get_vec_s16 (cpu, vs, 0); 4423 for (i = 1; i < (full ? 8 : 4); i++) 4424 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i)); 4425 break; 4426 case 2: 4427 smin = aarch64_get_vec_s32 (cpu, vs, 0); 4428 for (i = 1; i < (full ? 4 : 2); i++) 4429 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i)); 4430 break; 4431 4432 case 3: 4433 HALT_UNALLOC; 4434 } 4435 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin); 4436 return; 4437 } 4438 4439 case 2: /* UMAXV. */ 4440 { 4441 uint64_t umax; 4442 switch (INSTR (23, 22)) 4443 { 4444 case 0: 4445 umax = aarch64_get_vec_u8 (cpu, vs, 0); 4446 for (i = 1; i < (full ? 16 : 8); i++) 4447 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i)); 4448 break; 4449 case 1: 4450 umax = aarch64_get_vec_u16 (cpu, vs, 0); 4451 for (i = 1; i < (full ? 8 : 4); i++) 4452 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i)); 4453 break; 4454 case 2: 4455 umax = aarch64_get_vec_u32 (cpu, vs, 0); 4456 for (i = 1; i < (full ? 4 : 2); i++) 4457 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i)); 4458 break; 4459 4460 case 3: 4461 HALT_UNALLOC; 4462 } 4463 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax); 4464 return; 4465 } 4466 4467 case 3: /* UMINV. */ 4468 { 4469 uint64_t umin; 4470 switch (INSTR (23, 22)) 4471 { 4472 case 0: 4473 umin = aarch64_get_vec_u8 (cpu, vs, 0); 4474 for (i = 1; i < (full ? 16 : 8); i++) 4475 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i)); 4476 break; 4477 case 1: 4478 umin = aarch64_get_vec_u16 (cpu, vs, 0); 4479 for (i = 1; i < (full ? 8 : 4); i++) 4480 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i)); 4481 break; 4482 case 2: 4483 umin = aarch64_get_vec_u32 (cpu, vs, 0); 4484 for (i = 1; i < (full ? 4 : 2); i++) 4485 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i)); 4486 break; 4487 4488 case 3: 4489 HALT_UNALLOC; 4490 } 4491 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin); 4492 return; 4493 } 4494 } 4495 } 4496 4497 static void 4498 do_vec_fminmaxV (sim_cpu *cpu) 4499 { 4500 /* instr[31,24] = 0110 1110 4501 instr[23] = max(0)/min(1) 4502 instr[22,14] = 011 0000 11 4503 instr[13,12] = nm(00)/normal(11) 4504 instr[11,10] = 10 4505 instr[9,5] = V source 4506 instr[4.0] = R dest. */ 4507 4508 unsigned vs = INSTR (9, 5); 4509 unsigned rd = INSTR (4, 0); 4510 unsigned i; 4511 float res = aarch64_get_vec_float (cpu, vs, 0); 4512 4513 NYI_assert (31, 24, 0x6E); 4514 NYI_assert (22, 14, 0x0C3); 4515 NYI_assert (11, 10, 2); 4516 4517 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4518 if (INSTR (23, 23)) 4519 { 4520 switch (INSTR (13, 12)) 4521 { 4522 case 0: /* FMNINNMV. */ 4523 for (i = 1; i < 4; i++) 4524 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i)); 4525 break; 4526 4527 case 3: /* FMINV. */ 4528 for (i = 1; i < 4; i++) 4529 res = min (res, aarch64_get_vec_float (cpu, vs, i)); 4530 break; 4531 4532 default: 4533 HALT_NYI; 4534 } 4535 } 4536 else 4537 { 4538 switch (INSTR (13, 12)) 4539 { 4540 case 0: /* FMNAXNMV. */ 4541 for (i = 1; i < 4; i++) 4542 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i)); 4543 break; 4544 4545 case 3: /* FMAXV. */ 4546 for (i = 1; i < 4; i++) 4547 res = max (res, aarch64_get_vec_float (cpu, vs, i)); 4548 break; 4549 4550 default: 4551 HALT_NYI; 4552 } 4553 } 4554 4555 aarch64_set_FP_float (cpu, rd, res); 4556 } 4557 4558 static void 4559 do_vec_Fminmax (sim_cpu *cpu) 4560 { 4561 /* instr[31] = 0 4562 instr[30] = half(0)/full(1) 4563 instr[29,24] = 00 1110 4564 instr[23] = max(0)/min(1) 4565 instr[22] = float(0)/double(1) 4566 instr[21] = 1 4567 instr[20,16] = Vm 4568 instr[15,14] = 11 4569 instr[13,12] = nm(00)/normal(11) 4570 instr[11,10] = 01 4571 instr[9,5] = Vn 4572 instr[4,0] = Vd. */ 4573 4574 unsigned vm = INSTR (20, 16); 4575 unsigned vn = INSTR (9, 5); 4576 unsigned vd = INSTR (4, 0); 4577 unsigned full = INSTR (30, 30); 4578 unsigned min = INSTR (23, 23); 4579 unsigned i; 4580 4581 NYI_assert (29, 24, 0x0E); 4582 NYI_assert (21, 21, 1); 4583 NYI_assert (15, 14, 3); 4584 NYI_assert (11, 10, 1); 4585 4586 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4587 if (INSTR (22, 22)) 4588 { 4589 double (* func)(double, double); 4590 4591 if (! full) 4592 HALT_NYI; 4593 4594 if (INSTR (13, 12) == 0) 4595 func = min ? dminnm : dmaxnm; 4596 else if (INSTR (13, 12) == 3) 4597 func = min ? fmin : fmax; 4598 else 4599 HALT_NYI; 4600 4601 for (i = 0; i < 2; i++) 4602 aarch64_set_vec_double (cpu, vd, i, 4603 func (aarch64_get_vec_double (cpu, vn, i), 4604 aarch64_get_vec_double (cpu, vm, i))); 4605 } 4606 else 4607 { 4608 float (* func)(float, float); 4609 4610 if (INSTR (13, 12) == 0) 4611 func = min ? fminnm : fmaxnm; 4612 else if (INSTR (13, 12) == 3) 4613 func = min ? fminf : fmaxf; 4614 else 4615 HALT_NYI; 4616 4617 for (i = 0; i < (full ? 4 : 2); i++) 4618 aarch64_set_vec_float (cpu, vd, i, 4619 func (aarch64_get_vec_float (cpu, vn, i), 4620 aarch64_get_vec_float (cpu, vm, i))); 4621 } 4622 } 4623 4624 static void 4625 do_vec_SCVTF (sim_cpu *cpu) 4626 { 4627 /* instr[31] = 0 4628 instr[30] = Q 4629 instr[29,23] = 00 1110 0 4630 instr[22] = float(0)/double(1) 4631 instr[21,10] = 10 0001 1101 10 4632 instr[9,5] = Vn 4633 instr[4,0] = Vd. */ 4634 4635 unsigned vn = INSTR (9, 5); 4636 unsigned vd = INSTR (4, 0); 4637 unsigned full = INSTR (30, 30); 4638 unsigned size = INSTR (22, 22); 4639 unsigned i; 4640 4641 NYI_assert (29, 23, 0x1C); 4642 NYI_assert (21, 10, 0x876); 4643 4644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4645 if (size) 4646 { 4647 if (! full) 4648 HALT_UNALLOC; 4649 4650 for (i = 0; i < 2; i++) 4651 { 4652 double val = (double) aarch64_get_vec_u64 (cpu, vn, i); 4653 aarch64_set_vec_double (cpu, vd, i, val); 4654 } 4655 } 4656 else 4657 { 4658 for (i = 0; i < (full ? 4 : 2); i++) 4659 { 4660 float val = (float) aarch64_get_vec_u32 (cpu, vn, i); 4661 aarch64_set_vec_float (cpu, vd, i, val); 4662 } 4663 } 4664 } 4665 4666 #define VEC_CMP(SOURCE, CMP) \ 4667 do \ 4668 { \ 4669 switch (size) \ 4670 { \ 4671 case 0: \ 4672 for (i = 0; i < (full ? 16 : 8); i++) \ 4673 aarch64_set_vec_u8 (cpu, vd, i, \ 4674 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \ 4675 CMP \ 4676 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \ 4677 ? -1 : 0); \ 4678 return; \ 4679 case 1: \ 4680 for (i = 0; i < (full ? 8 : 4); i++) \ 4681 aarch64_set_vec_u16 (cpu, vd, i, \ 4682 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \ 4683 CMP \ 4684 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \ 4685 ? -1 : 0); \ 4686 return; \ 4687 case 2: \ 4688 for (i = 0; i < (full ? 4 : 2); i++) \ 4689 aarch64_set_vec_u32 (cpu, vd, i, \ 4690 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \ 4691 CMP \ 4692 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \ 4693 ? -1 : 0); \ 4694 return; \ 4695 case 3: \ 4696 if (! full) \ 4697 HALT_UNALLOC; \ 4698 for (i = 0; i < 2; i++) \ 4699 aarch64_set_vec_u64 (cpu, vd, i, \ 4700 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ 4701 CMP \ 4702 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \ 4703 ? -1ULL : 0); \ 4704 return; \ 4705 } \ 4706 } \ 4707 while (0) 4708 4709 #define VEC_CMP0(SOURCE, CMP) \ 4710 do \ 4711 { \ 4712 switch (size) \ 4713 { \ 4714 case 0: \ 4715 for (i = 0; i < (full ? 16 : 8); i++) \ 4716 aarch64_set_vec_u8 (cpu, vd, i, \ 4717 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \ 4718 CMP 0 ? -1 : 0); \ 4719 return; \ 4720 case 1: \ 4721 for (i = 0; i < (full ? 8 : 4); i++) \ 4722 aarch64_set_vec_u16 (cpu, vd, i, \ 4723 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \ 4724 CMP 0 ? -1 : 0); \ 4725 return; \ 4726 case 2: \ 4727 for (i = 0; i < (full ? 4 : 2); i++) \ 4728 aarch64_set_vec_u32 (cpu, vd, i, \ 4729 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \ 4730 CMP 0 ? -1 : 0); \ 4731 return; \ 4732 case 3: \ 4733 if (! full) \ 4734 HALT_UNALLOC; \ 4735 for (i = 0; i < 2; i++) \ 4736 aarch64_set_vec_u64 (cpu, vd, i, \ 4737 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ 4738 CMP 0 ? -1ULL : 0); \ 4739 return; \ 4740 } \ 4741 } \ 4742 while (0) 4743 4744 #define VEC_FCMP0(CMP) \ 4745 do \ 4746 { \ 4747 if (vm != 0) \ 4748 HALT_NYI; \ 4749 if (INSTR (22, 22)) \ 4750 { \ 4751 if (! full) \ 4752 HALT_NYI; \ 4753 for (i = 0; i < 2; i++) \ 4754 aarch64_set_vec_u64 (cpu, vd, i, \ 4755 aarch64_get_vec_double (cpu, vn, i) \ 4756 CMP 0.0 ? -1 : 0); \ 4757 } \ 4758 else \ 4759 { \ 4760 for (i = 0; i < (full ? 4 : 2); i++) \ 4761 aarch64_set_vec_u32 (cpu, vd, i, \ 4762 aarch64_get_vec_float (cpu, vn, i) \ 4763 CMP 0.0 ? -1 : 0); \ 4764 } \ 4765 return; \ 4766 } \ 4767 while (0) 4768 4769 #define VEC_FCMP(CMP) \ 4770 do \ 4771 { \ 4772 if (INSTR (22, 22)) \ 4773 { \ 4774 if (! full) \ 4775 HALT_NYI; \ 4776 for (i = 0; i < 2; i++) \ 4777 aarch64_set_vec_u64 (cpu, vd, i, \ 4778 aarch64_get_vec_double (cpu, vn, i) \ 4779 CMP \ 4780 aarch64_get_vec_double (cpu, vm, i) \ 4781 ? -1 : 0); \ 4782 } \ 4783 else \ 4784 { \ 4785 for (i = 0; i < (full ? 4 : 2); i++) \ 4786 aarch64_set_vec_u32 (cpu, vd, i, \ 4787 aarch64_get_vec_float (cpu, vn, i) \ 4788 CMP \ 4789 aarch64_get_vec_float (cpu, vm, i) \ 4790 ? -1 : 0); \ 4791 } \ 4792 return; \ 4793 } \ 4794 while (0) 4795 4796 static void 4797 do_vec_compare (sim_cpu *cpu) 4798 { 4799 /* instr[31] = 0 4800 instr[30] = half(0)/full(1) 4801 instr[29] = part-of-comparison-type 4802 instr[28,24] = 0 1110 4803 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11) 4804 type of float compares: single (-0) / double (-1) 4805 instr[21] = 1 4806 instr[20,16] = Vm or 00000 (compare vs 0) 4807 instr[15,10] = part-of-comparison-type 4808 instr[9,5] = Vn 4809 instr[4.0] = Vd. */ 4810 4811 int full = INSTR (30, 30); 4812 int size = INSTR (23, 22); 4813 unsigned vm = INSTR (20, 16); 4814 unsigned vn = INSTR (9, 5); 4815 unsigned vd = INSTR (4, 0); 4816 unsigned i; 4817 4818 NYI_assert (28, 24, 0x0E); 4819 NYI_assert (21, 21, 1); 4820 4821 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4822 if ((INSTR (11, 11) 4823 && INSTR (14, 14)) 4824 || ((INSTR (11, 11) == 0 4825 && INSTR (10, 10) == 0))) 4826 { 4827 /* A compare vs 0. */ 4828 if (vm != 0) 4829 { 4830 if (INSTR (15, 10) == 0x2A) 4831 do_vec_maxv (cpu); 4832 else if (INSTR (15, 10) == 0x32 4833 || INSTR (15, 10) == 0x3E) 4834 do_vec_fminmaxV (cpu); 4835 else if (INSTR (29, 23) == 0x1C 4836 && INSTR (21, 10) == 0x876) 4837 do_vec_SCVTF (cpu); 4838 else 4839 HALT_NYI; 4840 return; 4841 } 4842 } 4843 4844 if (INSTR (14, 14)) 4845 { 4846 /* A floating point compare. */ 4847 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4) 4848 | INSTR (13, 10); 4849 4850 NYI_assert (15, 15, 1); 4851 4852 switch (decode) 4853 { 4854 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>); 4855 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=); 4856 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==); 4857 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=); 4858 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<); 4859 case /* 0b111001: GT */ 0x39: VEC_FCMP (>); 4860 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=); 4861 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==); 4862 4863 default: 4864 HALT_NYI; 4865 } 4866 } 4867 else 4868 { 4869 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10); 4870 4871 switch (decode) 4872 { 4873 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > ); 4874 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= ); 4875 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > ); 4876 case 0x23: /* 0100011 TST */ VEC_CMP (u, & ); 4877 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == ); 4878 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < ); 4879 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > ); 4880 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= ); 4881 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= ); 4882 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == ); 4883 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= ); 4884 default: 4885 if (vm == 0) 4886 HALT_NYI; 4887 do_vec_maxv (cpu); 4888 } 4889 } 4890 } 4891 4892 static void 4893 do_vec_SSHL (sim_cpu *cpu) 4894 { 4895 /* instr[31] = 0 4896 instr[30] = first part (0)/ second part (1) 4897 instr[29,24] = 00 1110 4898 instr[23,22] = size: byte(00), half(01), word (10), long (11) 4899 instr[21] = 1 4900 instr[20,16] = Vm 4901 instr[15,10] = 0100 01 4902 instr[9,5] = Vn 4903 instr[4,0] = Vd. */ 4904 4905 unsigned full = INSTR (30, 30); 4906 unsigned vm = INSTR (20, 16); 4907 unsigned vn = INSTR (9, 5); 4908 unsigned vd = INSTR (4, 0); 4909 unsigned i; 4910 signed int shift; 4911 4912 NYI_assert (29, 24, 0x0E); 4913 NYI_assert (21, 21, 1); 4914 NYI_assert (15, 10, 0x11); 4915 4916 /* FIXME: What is a signed shift left in this context ?. */ 4917 4918 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4919 switch (INSTR (23, 22)) 4920 { 4921 case 0: 4922 for (i = 0; i < (full ? 16 : 8); i++) 4923 { 4924 shift = aarch64_get_vec_s8 (cpu, vm, i); 4925 if (shift >= 0) 4926 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) 4927 << shift); 4928 else 4929 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) 4930 >> - shift); 4931 } 4932 return; 4933 4934 case 1: 4935 for (i = 0; i < (full ? 8 : 4); i++) 4936 { 4937 shift = aarch64_get_vec_s8 (cpu, vm, i * 2); 4938 if (shift >= 0) 4939 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) 4940 << shift); 4941 else 4942 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) 4943 >> - shift); 4944 } 4945 return; 4946 4947 case 2: 4948 for (i = 0; i < (full ? 4 : 2); i++) 4949 { 4950 shift = aarch64_get_vec_s8 (cpu, vm, i * 4); 4951 if (shift >= 0) 4952 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) 4953 << shift); 4954 else 4955 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) 4956 >> - shift); 4957 } 4958 return; 4959 4960 case 3: 4961 if (! full) 4962 HALT_UNALLOC; 4963 for (i = 0; i < 2; i++) 4964 { 4965 shift = aarch64_get_vec_s8 (cpu, vm, i * 8); 4966 if (shift >= 0) 4967 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) 4968 << shift); 4969 else 4970 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) 4971 >> - shift); 4972 } 4973 return; 4974 } 4975 } 4976 4977 static void 4978 do_vec_USHL (sim_cpu *cpu) 4979 { 4980 /* instr[31] = 0 4981 instr[30] = first part (0)/ second part (1) 4982 instr[29,24] = 10 1110 4983 instr[23,22] = size: byte(00), half(01), word (10), long (11) 4984 instr[21] = 1 4985 instr[20,16] = Vm 4986 instr[15,10] = 0100 01 4987 instr[9,5] = Vn 4988 instr[4,0] = Vd */ 4989 4990 unsigned full = INSTR (30, 30); 4991 unsigned vm = INSTR (20, 16); 4992 unsigned vn = INSTR (9, 5); 4993 unsigned vd = INSTR (4, 0); 4994 unsigned i; 4995 signed int shift; 4996 4997 NYI_assert (29, 24, 0x2E); 4998 NYI_assert (15, 10, 0x11); 4999 5000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5001 switch (INSTR (23, 22)) 5002 { 5003 case 0: 5004 for (i = 0; i < (full ? 16 : 8); i++) 5005 { 5006 shift = aarch64_get_vec_s8 (cpu, vm, i); 5007 if (shift >= 0) 5008 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) 5009 << shift); 5010 else 5011 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) 5012 >> - shift); 5013 } 5014 return; 5015 5016 case 1: 5017 for (i = 0; i < (full ? 8 : 4); i++) 5018 { 5019 shift = aarch64_get_vec_s8 (cpu, vm, i * 2); 5020 if (shift >= 0) 5021 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) 5022 << shift); 5023 else 5024 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) 5025 >> - shift); 5026 } 5027 return; 5028 5029 case 2: 5030 for (i = 0; i < (full ? 4 : 2); i++) 5031 { 5032 shift = aarch64_get_vec_s8 (cpu, vm, i * 4); 5033 if (shift >= 0) 5034 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) 5035 << shift); 5036 else 5037 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) 5038 >> - shift); 5039 } 5040 return; 5041 5042 case 3: 5043 if (! full) 5044 HALT_UNALLOC; 5045 for (i = 0; i < 2; i++) 5046 { 5047 shift = aarch64_get_vec_s8 (cpu, vm, i * 8); 5048 if (shift >= 0) 5049 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) 5050 << shift); 5051 else 5052 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) 5053 >> - shift); 5054 } 5055 return; 5056 } 5057 } 5058 5059 static void 5060 do_vec_FMLA (sim_cpu *cpu) 5061 { 5062 /* instr[31] = 0 5063 instr[30] = full/half selector 5064 instr[29,23] = 0011100 5065 instr[22] = size: 0=>float, 1=>double 5066 instr[21] = 1 5067 instr[20,16] = Vn 5068 instr[15,10] = 1100 11 5069 instr[9,5] = Vm 5070 instr[4.0] = Vd. */ 5071 5072 unsigned vm = INSTR (20, 16); 5073 unsigned vn = INSTR (9, 5); 5074 unsigned vd = INSTR (4, 0); 5075 unsigned i; 5076 int full = INSTR (30, 30); 5077 5078 NYI_assert (29, 23, 0x1C); 5079 NYI_assert (21, 21, 1); 5080 NYI_assert (15, 10, 0x33); 5081 5082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5083 if (INSTR (22, 22)) 5084 { 5085 if (! full) 5086 HALT_UNALLOC; 5087 for (i = 0; i < 2; i++) 5088 aarch64_set_vec_double (cpu, vd, i, 5089 aarch64_get_vec_double (cpu, vn, i) * 5090 aarch64_get_vec_double (cpu, vm, i) + 5091 aarch64_get_vec_double (cpu, vd, i)); 5092 } 5093 else 5094 { 5095 for (i = 0; i < (full ? 4 : 2); i++) 5096 aarch64_set_vec_float (cpu, vd, i, 5097 aarch64_get_vec_float (cpu, vn, i) * 5098 aarch64_get_vec_float (cpu, vm, i) + 5099 aarch64_get_vec_float (cpu, vd, i)); 5100 } 5101 } 5102 5103 static void 5104 do_vec_max (sim_cpu *cpu) 5105 { 5106 /* instr[31] = 0 5107 instr[30] = full/half selector 5108 instr[29] = SMAX (0) / UMAX (1) 5109 instr[28,24] = 0 1110 5110 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit 5111 instr[21] = 1 5112 instr[20,16] = Vn 5113 instr[15,10] = 0110 01 5114 instr[9,5] = Vm 5115 instr[4.0] = Vd. */ 5116 5117 unsigned vm = INSTR (20, 16); 5118 unsigned vn = INSTR (9, 5); 5119 unsigned vd = INSTR (4, 0); 5120 unsigned i; 5121 int full = INSTR (30, 30); 5122 5123 NYI_assert (28, 24, 0x0E); 5124 NYI_assert (21, 21, 1); 5125 NYI_assert (15, 10, 0x19); 5126 5127 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5128 if (INSTR (29, 29)) 5129 { 5130 switch (INSTR (23, 22)) 5131 { 5132 case 0: 5133 for (i = 0; i < (full ? 16 : 8); i++) 5134 aarch64_set_vec_u8 (cpu, vd, i, 5135 aarch64_get_vec_u8 (cpu, vn, i) 5136 > aarch64_get_vec_u8 (cpu, vm, i) 5137 ? aarch64_get_vec_u8 (cpu, vn, i) 5138 : aarch64_get_vec_u8 (cpu, vm, i)); 5139 return; 5140 5141 case 1: 5142 for (i = 0; i < (full ? 8 : 4); i++) 5143 aarch64_set_vec_u16 (cpu, vd, i, 5144 aarch64_get_vec_u16 (cpu, vn, i) 5145 > aarch64_get_vec_u16 (cpu, vm, i) 5146 ? aarch64_get_vec_u16 (cpu, vn, i) 5147 : aarch64_get_vec_u16 (cpu, vm, i)); 5148 return; 5149 5150 case 2: 5151 for (i = 0; i < (full ? 4 : 2); i++) 5152 aarch64_set_vec_u32 (cpu, vd, i, 5153 aarch64_get_vec_u32 (cpu, vn, i) 5154 > aarch64_get_vec_u32 (cpu, vm, i) 5155 ? aarch64_get_vec_u32 (cpu, vn, i) 5156 : aarch64_get_vec_u32 (cpu, vm, i)); 5157 return; 5158 5159 case 3: 5160 HALT_UNALLOC; 5161 } 5162 } 5163 else 5164 { 5165 switch (INSTR (23, 22)) 5166 { 5167 case 0: 5168 for (i = 0; i < (full ? 16 : 8); i++) 5169 aarch64_set_vec_s8 (cpu, vd, i, 5170 aarch64_get_vec_s8 (cpu, vn, i) 5171 > aarch64_get_vec_s8 (cpu, vm, i) 5172 ? aarch64_get_vec_s8 (cpu, vn, i) 5173 : aarch64_get_vec_s8 (cpu, vm, i)); 5174 return; 5175 5176 case 1: 5177 for (i = 0; i < (full ? 8 : 4); i++) 5178 aarch64_set_vec_s16 (cpu, vd, i, 5179 aarch64_get_vec_s16 (cpu, vn, i) 5180 > aarch64_get_vec_s16 (cpu, vm, i) 5181 ? aarch64_get_vec_s16 (cpu, vn, i) 5182 : aarch64_get_vec_s16 (cpu, vm, i)); 5183 return; 5184 5185 case 2: 5186 for (i = 0; i < (full ? 4 : 2); i++) 5187 aarch64_set_vec_s32 (cpu, vd, i, 5188 aarch64_get_vec_s32 (cpu, vn, i) 5189 > aarch64_get_vec_s32 (cpu, vm, i) 5190 ? aarch64_get_vec_s32 (cpu, vn, i) 5191 : aarch64_get_vec_s32 (cpu, vm, i)); 5192 return; 5193 5194 case 3: 5195 HALT_UNALLOC; 5196 } 5197 } 5198 } 5199 5200 static void 5201 do_vec_min (sim_cpu *cpu) 5202 { 5203 /* instr[31] = 0 5204 instr[30] = full/half selector 5205 instr[29] = SMIN (0) / UMIN (1) 5206 instr[28,24] = 0 1110 5207 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit 5208 instr[21] = 1 5209 instr[20,16] = Vn 5210 instr[15,10] = 0110 11 5211 instr[9,5] = Vm 5212 instr[4.0] = Vd. */ 5213 5214 unsigned vm = INSTR (20, 16); 5215 unsigned vn = INSTR (9, 5); 5216 unsigned vd = INSTR (4, 0); 5217 unsigned i; 5218 int full = INSTR (30, 30); 5219 5220 NYI_assert (28, 24, 0x0E); 5221 NYI_assert (21, 21, 1); 5222 NYI_assert (15, 10, 0x1B); 5223 5224 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5225 if (INSTR (29, 29)) 5226 { 5227 switch (INSTR (23, 22)) 5228 { 5229 case 0: 5230 for (i = 0; i < (full ? 16 : 8); i++) 5231 aarch64_set_vec_u8 (cpu, vd, i, 5232 aarch64_get_vec_u8 (cpu, vn, i) 5233 < aarch64_get_vec_u8 (cpu, vm, i) 5234 ? aarch64_get_vec_u8 (cpu, vn, i) 5235 : aarch64_get_vec_u8 (cpu, vm, i)); 5236 return; 5237 5238 case 1: 5239 for (i = 0; i < (full ? 8 : 4); i++) 5240 aarch64_set_vec_u16 (cpu, vd, i, 5241 aarch64_get_vec_u16 (cpu, vn, i) 5242 < aarch64_get_vec_u16 (cpu, vm, i) 5243 ? aarch64_get_vec_u16 (cpu, vn, i) 5244 : aarch64_get_vec_u16 (cpu, vm, i)); 5245 return; 5246 5247 case 2: 5248 for (i = 0; i < (full ? 4 : 2); i++) 5249 aarch64_set_vec_u32 (cpu, vd, i, 5250 aarch64_get_vec_u32 (cpu, vn, i) 5251 < aarch64_get_vec_u32 (cpu, vm, i) 5252 ? aarch64_get_vec_u32 (cpu, vn, i) 5253 : aarch64_get_vec_u32 (cpu, vm, i)); 5254 return; 5255 5256 case 3: 5257 HALT_UNALLOC; 5258 } 5259 } 5260 else 5261 { 5262 switch (INSTR (23, 22)) 5263 { 5264 case 0: 5265 for (i = 0; i < (full ? 16 : 8); i++) 5266 aarch64_set_vec_s8 (cpu, vd, i, 5267 aarch64_get_vec_s8 (cpu, vn, i) 5268 < aarch64_get_vec_s8 (cpu, vm, i) 5269 ? aarch64_get_vec_s8 (cpu, vn, i) 5270 : aarch64_get_vec_s8 (cpu, vm, i)); 5271 return; 5272 5273 case 1: 5274 for (i = 0; i < (full ? 8 : 4); i++) 5275 aarch64_set_vec_s16 (cpu, vd, i, 5276 aarch64_get_vec_s16 (cpu, vn, i) 5277 < aarch64_get_vec_s16 (cpu, vm, i) 5278 ? aarch64_get_vec_s16 (cpu, vn, i) 5279 : aarch64_get_vec_s16 (cpu, vm, i)); 5280 return; 5281 5282 case 2: 5283 for (i = 0; i < (full ? 4 : 2); i++) 5284 aarch64_set_vec_s32 (cpu, vd, i, 5285 aarch64_get_vec_s32 (cpu, vn, i) 5286 < aarch64_get_vec_s32 (cpu, vm, i) 5287 ? aarch64_get_vec_s32 (cpu, vn, i) 5288 : aarch64_get_vec_s32 (cpu, vm, i)); 5289 return; 5290 5291 case 3: 5292 HALT_UNALLOC; 5293 } 5294 } 5295 } 5296 5297 static void 5298 do_vec_sub_long (sim_cpu *cpu) 5299 { 5300 /* instr[31] = 0 5301 instr[30] = lower (0) / upper (1) 5302 instr[29] = signed (0) / unsigned (1) 5303 instr[28,24] = 0 1110 5304 instr[23,22] = size: bytes (00), half (01), word (10) 5305 instr[21] = 1 5306 insrt[20,16] = Vm 5307 instr[15,10] = 0010 00 5308 instr[9,5] = Vn 5309 instr[4,0] = V dest. */ 5310 5311 unsigned size = INSTR (23, 22); 5312 unsigned vm = INSTR (20, 16); 5313 unsigned vn = INSTR (9, 5); 5314 unsigned vd = INSTR (4, 0); 5315 unsigned bias = 0; 5316 unsigned i; 5317 5318 NYI_assert (28, 24, 0x0E); 5319 NYI_assert (21, 21, 1); 5320 NYI_assert (15, 10, 0x08); 5321 5322 if (size == 3) 5323 HALT_UNALLOC; 5324 5325 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5326 switch (INSTR (30, 29)) 5327 { 5328 case 2: /* SSUBL2. */ 5329 bias = 2; 5330 case 0: /* SSUBL. */ 5331 switch (size) 5332 { 5333 case 0: 5334 bias *= 3; 5335 for (i = 0; i < 8; i++) 5336 aarch64_set_vec_s16 (cpu, vd, i, 5337 aarch64_get_vec_s8 (cpu, vn, i + bias) 5338 - aarch64_get_vec_s8 (cpu, vm, i + bias)); 5339 break; 5340 5341 case 1: 5342 bias *= 2; 5343 for (i = 0; i < 4; i++) 5344 aarch64_set_vec_s32 (cpu, vd, i, 5345 aarch64_get_vec_s16 (cpu, vn, i + bias) 5346 - aarch64_get_vec_s16 (cpu, vm, i + bias)); 5347 break; 5348 5349 case 2: 5350 for (i = 0; i < 2; i++) 5351 aarch64_set_vec_s64 (cpu, vd, i, 5352 aarch64_get_vec_s32 (cpu, vn, i + bias) 5353 - aarch64_get_vec_s32 (cpu, vm, i + bias)); 5354 break; 5355 5356 default: 5357 HALT_UNALLOC; 5358 } 5359 break; 5360 5361 case 3: /* USUBL2. */ 5362 bias = 2; 5363 case 1: /* USUBL. */ 5364 switch (size) 5365 { 5366 case 0: 5367 bias *= 3; 5368 for (i = 0; i < 8; i++) 5369 aarch64_set_vec_u16 (cpu, vd, i, 5370 aarch64_get_vec_u8 (cpu, vn, i + bias) 5371 - aarch64_get_vec_u8 (cpu, vm, i + bias)); 5372 break; 5373 5374 case 1: 5375 bias *= 2; 5376 for (i = 0; i < 4; i++) 5377 aarch64_set_vec_u32 (cpu, vd, i, 5378 aarch64_get_vec_u16 (cpu, vn, i + bias) 5379 - aarch64_get_vec_u16 (cpu, vm, i + bias)); 5380 break; 5381 5382 case 2: 5383 for (i = 0; i < 2; i++) 5384 aarch64_set_vec_u64 (cpu, vd, i, 5385 aarch64_get_vec_u32 (cpu, vn, i + bias) 5386 - aarch64_get_vec_u32 (cpu, vm, i + bias)); 5387 break; 5388 5389 default: 5390 HALT_UNALLOC; 5391 } 5392 break; 5393 } 5394 } 5395 5396 static void 5397 do_vec_ADDP (sim_cpu *cpu) 5398 { 5399 /* instr[31] = 0 5400 instr[30] = half(0)/full(1) 5401 instr[29,24] = 00 1110 5402 instr[23,22] = size: bytes (00), half (01), word (10), long (11) 5403 instr[21] = 1 5404 insrt[20,16] = Vm 5405 instr[15,10] = 1011 11 5406 instr[9,5] = Vn 5407 instr[4,0] = V dest. */ 5408 5409 FRegister copy_vn; 5410 FRegister copy_vm; 5411 unsigned full = INSTR (30, 30); 5412 unsigned size = INSTR (23, 22); 5413 unsigned vm = INSTR (20, 16); 5414 unsigned vn = INSTR (9, 5); 5415 unsigned vd = INSTR (4, 0); 5416 unsigned i, range; 5417 5418 NYI_assert (29, 24, 0x0E); 5419 NYI_assert (21, 21, 1); 5420 NYI_assert (15, 10, 0x2F); 5421 5422 /* Make copies of the source registers in case vd == vn/vm. */ 5423 copy_vn = cpu->fr[vn]; 5424 copy_vm = cpu->fr[vm]; 5425 5426 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5427 switch (size) 5428 { 5429 case 0: 5430 range = full ? 8 : 4; 5431 for (i = 0; i < range; i++) 5432 { 5433 aarch64_set_vec_u8 (cpu, vd, i, 5434 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]); 5435 aarch64_set_vec_u8 (cpu, vd, i + range, 5436 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]); 5437 } 5438 return; 5439 5440 case 1: 5441 range = full ? 4 : 2; 5442 for (i = 0; i < range; i++) 5443 { 5444 aarch64_set_vec_u16 (cpu, vd, i, 5445 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]); 5446 aarch64_set_vec_u16 (cpu, vd, i + range, 5447 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]); 5448 } 5449 return; 5450 5451 case 2: 5452 range = full ? 2 : 1; 5453 for (i = 0; i < range; i++) 5454 { 5455 aarch64_set_vec_u32 (cpu, vd, i, 5456 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]); 5457 aarch64_set_vec_u32 (cpu, vd, i + range, 5458 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]); 5459 } 5460 return; 5461 5462 case 3: 5463 if (! full) 5464 HALT_UNALLOC; 5465 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]); 5466 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]); 5467 return; 5468 } 5469 } 5470 5471 /* Float point vector convert to longer (precision). */ 5472 static void 5473 do_vec_FCVTL (sim_cpu *cpu) 5474 { 5475 /* instr[31] = 0 5476 instr[30] = half (0) / all (1) 5477 instr[29,23] = 00 1110 0 5478 instr[22] = single (0) / double (1) 5479 instr[21,10] = 10 0001 0111 10 5480 instr[9,5] = Rn 5481 instr[4,0] = Rd. */ 5482 5483 unsigned rn = INSTR (9, 5); 5484 unsigned rd = INSTR (4, 0); 5485 unsigned full = INSTR (30, 30); 5486 unsigned i; 5487 5488 NYI_assert (31, 31, 0); 5489 NYI_assert (29, 23, 0x1C); 5490 NYI_assert (21, 10, 0x85E); 5491 5492 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5493 if (INSTR (22, 22)) 5494 { 5495 for (i = 0; i < 2; i++) 5496 aarch64_set_vec_double (cpu, rd, i, 5497 aarch64_get_vec_float (cpu, rn, i + 2*full)); 5498 } 5499 else 5500 { 5501 HALT_NYI; 5502 5503 #if 0 5504 /* TODO: Implement missing half-float support. */ 5505 for (i = 0; i < 4; i++) 5506 aarch64_set_vec_float (cpu, rd, i, 5507 aarch64_get_vec_halffloat (cpu, rn, i + 4*full)); 5508 #endif 5509 } 5510 } 5511 5512 static void 5513 do_vec_FABS (sim_cpu *cpu) 5514 { 5515 /* instr[31] = 0 5516 instr[30] = half(0)/full(1) 5517 instr[29,23] = 00 1110 1 5518 instr[22] = float(0)/double(1) 5519 instr[21,16] = 10 0000 5520 instr[15,10] = 1111 10 5521 instr[9,5] = Vn 5522 instr[4,0] = Vd. */ 5523 5524 unsigned vn = INSTR (9, 5); 5525 unsigned vd = INSTR (4, 0); 5526 unsigned full = INSTR (30, 30); 5527 unsigned i; 5528 5529 NYI_assert (29, 23, 0x1D); 5530 NYI_assert (21, 10, 0x83E); 5531 5532 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5533 if (INSTR (22, 22)) 5534 { 5535 if (! full) 5536 HALT_NYI; 5537 5538 for (i = 0; i < 2; i++) 5539 aarch64_set_vec_double (cpu, vd, i, 5540 fabs (aarch64_get_vec_double (cpu, vn, i))); 5541 } 5542 else 5543 { 5544 for (i = 0; i < (full ? 4 : 2); i++) 5545 aarch64_set_vec_float (cpu, vd, i, 5546 fabsf (aarch64_get_vec_float (cpu, vn, i))); 5547 } 5548 } 5549 5550 static void 5551 do_vec_FCVTZS (sim_cpu *cpu) 5552 { 5553 /* instr[31] = 0 5554 instr[30] = half (0) / all (1) 5555 instr[29,23] = 00 1110 1 5556 instr[22] = single (0) / double (1) 5557 instr[21,10] = 10 0001 1011 10 5558 instr[9,5] = Rn 5559 instr[4,0] = Rd. */ 5560 5561 unsigned rn = INSTR (9, 5); 5562 unsigned rd = INSTR (4, 0); 5563 unsigned full = INSTR (30, 30); 5564 unsigned i; 5565 5566 NYI_assert (31, 31, 0); 5567 NYI_assert (29, 23, 0x1D); 5568 NYI_assert (21, 10, 0x86E); 5569 5570 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5571 if (INSTR (22, 22)) 5572 { 5573 if (! full) 5574 HALT_UNALLOC; 5575 5576 for (i = 0; i < 2; i++) 5577 aarch64_set_vec_s64 (cpu, rd, i, 5578 (int64_t) aarch64_get_vec_double (cpu, rn, i)); 5579 } 5580 else 5581 for (i = 0; i < (full ? 4 : 2); i++) 5582 aarch64_set_vec_s32 (cpu, rd, i, 5583 (int32_t) aarch64_get_vec_float (cpu, rn, i)); 5584 } 5585 5586 static void 5587 do_vec_REV64 (sim_cpu *cpu) 5588 { 5589 /* instr[31] = 0 5590 instr[30] = full/half 5591 instr[29,24] = 00 1110 5592 instr[23,22] = size 5593 instr[21,10] = 10 0000 0000 10 5594 instr[9,5] = Rn 5595 instr[4,0] = Rd. */ 5596 5597 unsigned rn = INSTR (9, 5); 5598 unsigned rd = INSTR (4, 0); 5599 unsigned size = INSTR (23, 22); 5600 unsigned full = INSTR (30, 30); 5601 unsigned i; 5602 FRegister val; 5603 5604 NYI_assert (29, 24, 0x0E); 5605 NYI_assert (21, 10, 0x802); 5606 5607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5608 switch (size) 5609 { 5610 case 0: 5611 for (i = 0; i < (full ? 16 : 8); i++) 5612 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i); 5613 break; 5614 5615 case 1: 5616 for (i = 0; i < (full ? 8 : 4); i++) 5617 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i); 5618 break; 5619 5620 case 2: 5621 for (i = 0; i < (full ? 4 : 2); i++) 5622 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i); 5623 break; 5624 5625 case 3: 5626 HALT_UNALLOC; 5627 } 5628 5629 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); 5630 if (full) 5631 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); 5632 } 5633 5634 static void 5635 do_vec_REV16 (sim_cpu *cpu) 5636 { 5637 /* instr[31] = 0 5638 instr[30] = full/half 5639 instr[29,24] = 00 1110 5640 instr[23,22] = size 5641 instr[21,10] = 10 0000 0001 10 5642 instr[9,5] = Rn 5643 instr[4,0] = Rd. */ 5644 5645 unsigned rn = INSTR (9, 5); 5646 unsigned rd = INSTR (4, 0); 5647 unsigned size = INSTR (23, 22); 5648 unsigned full = INSTR (30, 30); 5649 unsigned i; 5650 FRegister val; 5651 5652 NYI_assert (29, 24, 0x0E); 5653 NYI_assert (21, 10, 0x806); 5654 5655 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5656 switch (size) 5657 { 5658 case 0: 5659 for (i = 0; i < (full ? 16 : 8); i++) 5660 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i); 5661 break; 5662 5663 default: 5664 HALT_UNALLOC; 5665 } 5666 5667 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); 5668 if (full) 5669 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); 5670 } 5671 5672 static void 5673 do_vec_op1 (sim_cpu *cpu) 5674 { 5675 /* instr[31] = 0 5676 instr[30] = half/full 5677 instr[29,24] = 00 1110 5678 instr[23,21] = ??? 5679 instr[20,16] = Vm 5680 instr[15,10] = sub-opcode 5681 instr[9,5] = Vn 5682 instr[4,0] = Vd */ 5683 NYI_assert (29, 24, 0x0E); 5684 5685 if (INSTR (21, 21) == 0) 5686 { 5687 if (INSTR (23, 22) == 0) 5688 { 5689 if (INSTR (30, 30) == 1 5690 && INSTR (17, 14) == 0 5691 && INSTR (12, 10) == 7) 5692 return do_vec_ins_2 (cpu); 5693 5694 switch (INSTR (15, 10)) 5695 { 5696 case 0x01: do_vec_DUP_vector_into_vector (cpu); return; 5697 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return; 5698 case 0x07: do_vec_INS (cpu); return; 5699 case 0x0B: do_vec_SMOV_into_scalar (cpu); return; 5700 case 0x0F: do_vec_UMOV_into_scalar (cpu); return; 5701 5702 case 0x00: 5703 case 0x08: 5704 case 0x10: 5705 case 0x18: 5706 do_vec_TBL (cpu); return; 5707 5708 case 0x06: 5709 case 0x16: 5710 do_vec_UZP (cpu); return; 5711 5712 case 0x0A: do_vec_TRN (cpu); return; 5713 5714 case 0x0E: 5715 case 0x1E: 5716 do_vec_ZIP (cpu); return; 5717 5718 default: 5719 HALT_NYI; 5720 } 5721 } 5722 5723 switch (INSTR (13, 10)) 5724 { 5725 case 0x6: do_vec_UZP (cpu); return; 5726 case 0xE: do_vec_ZIP (cpu); return; 5727 case 0xA: do_vec_TRN (cpu); return; 5728 default: HALT_NYI; 5729 } 5730 } 5731 5732 switch (INSTR (15, 10)) 5733 { 5734 case 0x02: do_vec_REV64 (cpu); return; 5735 case 0x06: do_vec_REV16 (cpu); return; 5736 5737 case 0x07: 5738 switch (INSTR (23, 21)) 5739 { 5740 case 1: do_vec_AND (cpu); return; 5741 case 3: do_vec_BIC (cpu); return; 5742 case 5: do_vec_ORR (cpu); return; 5743 case 7: do_vec_ORN (cpu); return; 5744 default: HALT_NYI; 5745 } 5746 5747 case 0x08: do_vec_sub_long (cpu); return; 5748 case 0x0a: do_vec_XTN (cpu); return; 5749 case 0x11: do_vec_SSHL (cpu); return; 5750 case 0x16: do_vec_CNT (cpu); return; 5751 case 0x19: do_vec_max (cpu); return; 5752 case 0x1B: do_vec_min (cpu); return; 5753 case 0x21: do_vec_add (cpu); return; 5754 case 0x25: do_vec_MLA (cpu); return; 5755 case 0x27: do_vec_mul (cpu); return; 5756 case 0x2F: do_vec_ADDP (cpu); return; 5757 case 0x30: do_vec_mull (cpu); return; 5758 case 0x33: do_vec_FMLA (cpu); return; 5759 case 0x35: do_vec_fadd (cpu); return; 5760 5761 case 0x1E: 5762 switch (INSTR (20, 16)) 5763 { 5764 case 0x01: do_vec_FCVTL (cpu); return; 5765 default: HALT_NYI; 5766 } 5767 5768 case 0x2E: 5769 switch (INSTR (20, 16)) 5770 { 5771 case 0x00: do_vec_ABS (cpu); return; 5772 case 0x01: do_vec_FCVTZS (cpu); return; 5773 case 0x11: do_vec_ADDV (cpu); return; 5774 default: HALT_NYI; 5775 } 5776 5777 case 0x31: 5778 case 0x3B: 5779 do_vec_Fminmax (cpu); return; 5780 5781 case 0x0D: 5782 case 0x0F: 5783 case 0x22: 5784 case 0x23: 5785 case 0x26: 5786 case 0x2A: 5787 case 0x32: 5788 case 0x36: 5789 case 0x39: 5790 case 0x3A: 5791 do_vec_compare (cpu); return; 5792 5793 case 0x3E: 5794 do_vec_FABS (cpu); return; 5795 5796 default: 5797 HALT_NYI; 5798 } 5799 } 5800 5801 static void 5802 do_vec_xtl (sim_cpu *cpu) 5803 { 5804 /* instr[31] = 0 5805 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11) 5806 instr[28,22] = 0 1111 00 5807 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2) 5808 instr[15,10] = 1010 01 5809 instr[9,5] = V source 5810 instr[4,0] = V dest. */ 5811 5812 unsigned vs = INSTR (9, 5); 5813 unsigned vd = INSTR (4, 0); 5814 unsigned i, shift, bias = 0; 5815 5816 NYI_assert (28, 22, 0x3C); 5817 NYI_assert (15, 10, 0x29); 5818 5819 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5820 switch (INSTR (30, 29)) 5821 { 5822 case 2: /* SXTL2, SSHLL2. */ 5823 bias = 2; 5824 case 0: /* SXTL, SSHLL. */ 5825 if (INSTR (21, 21)) 5826 { 5827 int64_t val1, val2; 5828 5829 shift = INSTR (20, 16); 5830 /* Get the source values before setting the destination values 5831 in case the source and destination are the same. */ 5832 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift; 5833 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift; 5834 aarch64_set_vec_s64 (cpu, vd, 0, val1); 5835 aarch64_set_vec_s64 (cpu, vd, 1, val2); 5836 } 5837 else if (INSTR (20, 20)) 5838 { 5839 int32_t v[4]; 5840 int32_t v1,v2,v3,v4; 5841 5842 shift = INSTR (19, 16); 5843 bias *= 2; 5844 for (i = 0; i < 4; i++) 5845 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift; 5846 for (i = 0; i < 4; i++) 5847 aarch64_set_vec_s32 (cpu, vd, i, v[i]); 5848 } 5849 else 5850 { 5851 int16_t v[8]; 5852 NYI_assert (19, 19, 1); 5853 5854 shift = INSTR (18, 16); 5855 bias *= 4; 5856 for (i = 0; i < 8; i++) 5857 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift; 5858 for (i = 0; i < 8; i++) 5859 aarch64_set_vec_s16 (cpu, vd, i, v[i]); 5860 } 5861 return; 5862 5863 case 3: /* UXTL2, USHLL2. */ 5864 bias = 2; 5865 case 1: /* UXTL, USHLL. */ 5866 if (INSTR (21, 21)) 5867 { 5868 uint64_t v1, v2; 5869 shift = INSTR (20, 16); 5870 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift; 5871 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift; 5872 aarch64_set_vec_u64 (cpu, vd, 0, v1); 5873 aarch64_set_vec_u64 (cpu, vd, 1, v2); 5874 } 5875 else if (INSTR (20, 20)) 5876 { 5877 uint32_t v[4]; 5878 shift = INSTR (19, 16); 5879 bias *= 2; 5880 for (i = 0; i < 4; i++) 5881 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift; 5882 for (i = 0; i < 4; i++) 5883 aarch64_set_vec_u32 (cpu, vd, i, v[i]); 5884 } 5885 else 5886 { 5887 uint16_t v[8]; 5888 NYI_assert (19, 19, 1); 5889 5890 shift = INSTR (18, 16); 5891 bias *= 4; 5892 for (i = 0; i < 8; i++) 5893 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift; 5894 for (i = 0; i < 8; i++) 5895 aarch64_set_vec_u16 (cpu, vd, i, v[i]); 5896 } 5897 return; 5898 } 5899 } 5900 5901 static void 5902 do_vec_SHL (sim_cpu *cpu) 5903 { 5904 /* instr [31] = 0 5905 instr [30] = half(0)/full(1) 5906 instr [29,23] = 001 1110 5907 instr [22,16] = size and shift amount 5908 instr [15,10] = 01 0101 5909 instr [9, 5] = Vs 5910 instr [4, 0] = Vd. */ 5911 5912 int shift; 5913 int full = INSTR (30, 30); 5914 unsigned vs = INSTR (9, 5); 5915 unsigned vd = INSTR (4, 0); 5916 unsigned i; 5917 5918 NYI_assert (29, 23, 0x1E); 5919 NYI_assert (15, 10, 0x15); 5920 5921 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5922 if (INSTR (22, 22)) 5923 { 5924 shift = INSTR (21, 16); 5925 5926 if (full == 0) 5927 HALT_UNALLOC; 5928 5929 for (i = 0; i < 2; i++) 5930 { 5931 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i); 5932 aarch64_set_vec_u64 (cpu, vd, i, val << shift); 5933 } 5934 5935 return; 5936 } 5937 5938 if (INSTR (21, 21)) 5939 { 5940 shift = INSTR (20, 16); 5941 5942 for (i = 0; i < (full ? 4 : 2); i++) 5943 { 5944 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i); 5945 aarch64_set_vec_u32 (cpu, vd, i, val << shift); 5946 } 5947 5948 return; 5949 } 5950 5951 if (INSTR (20, 20)) 5952 { 5953 shift = INSTR (19, 16); 5954 5955 for (i = 0; i < (full ? 8 : 4); i++) 5956 { 5957 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i); 5958 aarch64_set_vec_u16 (cpu, vd, i, val << shift); 5959 } 5960 5961 return; 5962 } 5963 5964 if (INSTR (19, 19) == 0) 5965 HALT_UNALLOC; 5966 5967 shift = INSTR (18, 16); 5968 5969 for (i = 0; i < (full ? 16 : 8); i++) 5970 { 5971 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i); 5972 aarch64_set_vec_u8 (cpu, vd, i, val << shift); 5973 } 5974 } 5975 5976 static void 5977 do_vec_SSHR_USHR (sim_cpu *cpu) 5978 { 5979 /* instr [31] = 0 5980 instr [30] = half(0)/full(1) 5981 instr [29] = signed(0)/unsigned(1) 5982 instr [28,23] = 0 1111 0 5983 instr [22,16] = size and shift amount 5984 instr [15,10] = 0000 01 5985 instr [9, 5] = Vs 5986 instr [4, 0] = Vd. */ 5987 5988 int full = INSTR (30, 30); 5989 int sign = ! INSTR (29, 29); 5990 unsigned shift = INSTR (22, 16); 5991 unsigned vs = INSTR (9, 5); 5992 unsigned vd = INSTR (4, 0); 5993 unsigned i; 5994 5995 NYI_assert (28, 23, 0x1E); 5996 NYI_assert (15, 10, 0x01); 5997 5998 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5999 if (INSTR (22, 22)) 6000 { 6001 shift = 128 - shift; 6002 6003 if (full == 0) 6004 HALT_UNALLOC; 6005 6006 if (sign) 6007 for (i = 0; i < 2; i++) 6008 { 6009 int64_t val = aarch64_get_vec_s64 (cpu, vs, i); 6010 aarch64_set_vec_s64 (cpu, vd, i, val >> shift); 6011 } 6012 else 6013 for (i = 0; i < 2; i++) 6014 { 6015 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i); 6016 aarch64_set_vec_u64 (cpu, vd, i, val >> shift); 6017 } 6018 6019 return; 6020 } 6021 6022 if (INSTR (21, 21)) 6023 { 6024 shift = 64 - shift; 6025 6026 if (sign) 6027 for (i = 0; i < (full ? 4 : 2); i++) 6028 { 6029 int32_t val = aarch64_get_vec_s32 (cpu, vs, i); 6030 aarch64_set_vec_s32 (cpu, vd, i, val >> shift); 6031 } 6032 else 6033 for (i = 0; i < (full ? 4 : 2); i++) 6034 { 6035 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i); 6036 aarch64_set_vec_u32 (cpu, vd, i, val >> shift); 6037 } 6038 6039 return; 6040 } 6041 6042 if (INSTR (20, 20)) 6043 { 6044 shift = 32 - shift; 6045 6046 if (sign) 6047 for (i = 0; i < (full ? 8 : 4); i++) 6048 { 6049 int16_t val = aarch64_get_vec_s16 (cpu, vs, i); 6050 aarch64_set_vec_s16 (cpu, vd, i, val >> shift); 6051 } 6052 else 6053 for (i = 0; i < (full ? 8 : 4); i++) 6054 { 6055 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i); 6056 aarch64_set_vec_u16 (cpu, vd, i, val >> shift); 6057 } 6058 6059 return; 6060 } 6061 6062 if (INSTR (19, 19) == 0) 6063 HALT_UNALLOC; 6064 6065 shift = 16 - shift; 6066 6067 if (sign) 6068 for (i = 0; i < (full ? 16 : 8); i++) 6069 { 6070 int8_t val = aarch64_get_vec_s8 (cpu, vs, i); 6071 aarch64_set_vec_s8 (cpu, vd, i, val >> shift); 6072 } 6073 else 6074 for (i = 0; i < (full ? 16 : 8); i++) 6075 { 6076 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i); 6077 aarch64_set_vec_u8 (cpu, vd, i, val >> shift); 6078 } 6079 } 6080 6081 static void 6082 do_vec_MUL_by_element (sim_cpu *cpu) 6083 { 6084 /* instr[31] = 0 6085 instr[30] = half/full 6086 instr[29,24] = 00 1111 6087 instr[23,22] = size 6088 instr[21] = L 6089 instr[20] = M 6090 instr[19,16] = m 6091 instr[15,12] = 1000 6092 instr[11] = H 6093 instr[10] = 0 6094 instr[9,5] = Vn 6095 instr[4,0] = Vd */ 6096 6097 unsigned full = INSTR (30, 30); 6098 unsigned L = INSTR (21, 21); 6099 unsigned H = INSTR (11, 11); 6100 unsigned vn = INSTR (9, 5); 6101 unsigned vd = INSTR (4, 0); 6102 unsigned size = INSTR (23, 22); 6103 unsigned index; 6104 unsigned vm; 6105 unsigned e; 6106 6107 NYI_assert (29, 24, 0x0F); 6108 NYI_assert (15, 12, 0x8); 6109 NYI_assert (10, 10, 0); 6110 6111 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6112 switch (size) 6113 { 6114 case 1: 6115 { 6116 /* 16 bit products. */ 6117 uint16_t product; 6118 uint16_t element1; 6119 uint16_t element2; 6120 6121 index = (H << 2) | (L << 1) | INSTR (20, 20); 6122 vm = INSTR (19, 16); 6123 element2 = aarch64_get_vec_u16 (cpu, vm, index); 6124 6125 for (e = 0; e < (full ? 8 : 4); e ++) 6126 { 6127 element1 = aarch64_get_vec_u16 (cpu, vn, e); 6128 product = element1 * element2; 6129 aarch64_set_vec_u16 (cpu, vd, e, product); 6130 } 6131 } 6132 break; 6133 6134 case 2: 6135 { 6136 /* 32 bit products. */ 6137 uint32_t product; 6138 uint32_t element1; 6139 uint32_t element2; 6140 6141 index = (H << 1) | L; 6142 vm = INSTR (20, 16); 6143 element2 = aarch64_get_vec_u32 (cpu, vm, index); 6144 6145 for (e = 0; e < (full ? 4 : 2); e ++) 6146 { 6147 element1 = aarch64_get_vec_u32 (cpu, vn, e); 6148 product = element1 * element2; 6149 aarch64_set_vec_u32 (cpu, vd, e, product); 6150 } 6151 } 6152 break; 6153 6154 default: 6155 HALT_UNALLOC; 6156 } 6157 } 6158 6159 static void 6160 do_FMLA_by_element (sim_cpu *cpu) 6161 { 6162 /* instr[31] = 0 6163 instr[30] = half/full 6164 instr[29,23] = 00 1111 1 6165 instr[22] = size 6166 instr[21] = L 6167 instr[20,16] = m 6168 instr[15,12] = 0001 6169 instr[11] = H 6170 instr[10] = 0 6171 instr[9,5] = Vn 6172 instr[4,0] = Vd */ 6173 6174 unsigned full = INSTR (30, 30); 6175 unsigned size = INSTR (22, 22); 6176 unsigned L = INSTR (21, 21); 6177 unsigned vm = INSTR (20, 16); 6178 unsigned H = INSTR (11, 11); 6179 unsigned vn = INSTR (9, 5); 6180 unsigned vd = INSTR (4, 0); 6181 unsigned e; 6182 6183 NYI_assert (29, 23, 0x1F); 6184 NYI_assert (15, 12, 0x1); 6185 NYI_assert (10, 10, 0); 6186 6187 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6188 if (size) 6189 { 6190 double element1, element2; 6191 6192 if (! full || L) 6193 HALT_UNALLOC; 6194 6195 element2 = aarch64_get_vec_double (cpu, vm, H); 6196 6197 for (e = 0; e < 2; e++) 6198 { 6199 element1 = aarch64_get_vec_double (cpu, vn, e); 6200 element1 *= element2; 6201 element1 += aarch64_get_vec_double (cpu, vd, e); 6202 aarch64_set_vec_double (cpu, vd, e, element1); 6203 } 6204 } 6205 else 6206 { 6207 float element1; 6208 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L); 6209 6210 for (e = 0; e < (full ? 4 : 2); e++) 6211 { 6212 element1 = aarch64_get_vec_float (cpu, vn, e); 6213 element1 *= element2; 6214 element1 += aarch64_get_vec_float (cpu, vd, e); 6215 aarch64_set_vec_float (cpu, vd, e, element1); 6216 } 6217 } 6218 } 6219 6220 static void 6221 do_vec_op2 (sim_cpu *cpu) 6222 { 6223 /* instr[31] = 0 6224 instr[30] = half/full 6225 instr[29,24] = 00 1111 6226 instr[23] = ? 6227 instr[22,16] = element size & index 6228 instr[15,10] = sub-opcode 6229 instr[9,5] = Vm 6230 instr[4,0] = Vd */ 6231 6232 NYI_assert (29, 24, 0x0F); 6233 6234 if (INSTR (23, 23) != 0) 6235 { 6236 switch (INSTR (15, 10)) 6237 { 6238 case 0x04: 6239 case 0x06: 6240 do_FMLA_by_element (cpu); 6241 return; 6242 6243 case 0x20: 6244 case 0x22: 6245 do_vec_MUL_by_element (cpu); 6246 return; 6247 6248 default: 6249 HALT_NYI; 6250 } 6251 } 6252 else 6253 { 6254 switch (INSTR (15, 10)) 6255 { 6256 case 0x01: do_vec_SSHR_USHR (cpu); return; 6257 case 0x15: do_vec_SHL (cpu); return; 6258 case 0x20: 6259 case 0x22: do_vec_MUL_by_element (cpu); return; 6260 case 0x29: do_vec_xtl (cpu); return; 6261 default: HALT_NYI; 6262 } 6263 } 6264 } 6265 6266 static void 6267 do_vec_neg (sim_cpu *cpu) 6268 { 6269 /* instr[31] = 0 6270 instr[30] = full(1)/half(0) 6271 instr[29,24] = 10 1110 6272 instr[23,22] = size: byte(00), half (01), word (10), long (11) 6273 instr[21,10] = 1000 0010 1110 6274 instr[9,5] = Vs 6275 instr[4,0] = Vd */ 6276 6277 int full = INSTR (30, 30); 6278 unsigned vs = INSTR (9, 5); 6279 unsigned vd = INSTR (4, 0); 6280 unsigned i; 6281 6282 NYI_assert (29, 24, 0x2E); 6283 NYI_assert (21, 10, 0x82E); 6284 6285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6286 switch (INSTR (23, 22)) 6287 { 6288 case 0: 6289 for (i = 0; i < (full ? 16 : 8); i++) 6290 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i)); 6291 return; 6292 6293 case 1: 6294 for (i = 0; i < (full ? 8 : 4); i++) 6295 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i)); 6296 return; 6297 6298 case 2: 6299 for (i = 0; i < (full ? 4 : 2); i++) 6300 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i)); 6301 return; 6302 6303 case 3: 6304 if (! full) 6305 HALT_NYI; 6306 for (i = 0; i < 2; i++) 6307 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i)); 6308 return; 6309 } 6310 } 6311 6312 static void 6313 do_vec_sqrt (sim_cpu *cpu) 6314 { 6315 /* instr[31] = 0 6316 instr[30] = full(1)/half(0) 6317 instr[29,23] = 101 1101 6318 instr[22] = single(0)/double(1) 6319 instr[21,10] = 1000 0111 1110 6320 instr[9,5] = Vs 6321 instr[4,0] = Vd. */ 6322 6323 int full = INSTR (30, 30); 6324 unsigned vs = INSTR (9, 5); 6325 unsigned vd = INSTR (4, 0); 6326 unsigned i; 6327 6328 NYI_assert (29, 23, 0x5B); 6329 NYI_assert (21, 10, 0x87E); 6330 6331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6332 if (INSTR (22, 22) == 0) 6333 for (i = 0; i < (full ? 4 : 2); i++) 6334 aarch64_set_vec_float (cpu, vd, i, 6335 sqrtf (aarch64_get_vec_float (cpu, vs, i))); 6336 else 6337 for (i = 0; i < 2; i++) 6338 aarch64_set_vec_double (cpu, vd, i, 6339 sqrt (aarch64_get_vec_double (cpu, vs, i))); 6340 } 6341 6342 static void 6343 do_vec_mls_indexed (sim_cpu *cpu) 6344 { 6345 /* instr[31] = 0 6346 instr[30] = half(0)/full(1) 6347 instr[29,24] = 10 1111 6348 instr[23,22] = 16-bit(01)/32-bit(10) 6349 instr[21,20+11] = index (if 16-bit) 6350 instr[21+11] = index (if 32-bit) 6351 instr[20,16] = Vm 6352 instr[15,12] = 0100 6353 instr[11] = part of index 6354 instr[10] = 0 6355 instr[9,5] = Vs 6356 instr[4,0] = Vd. */ 6357 6358 int full = INSTR (30, 30); 6359 unsigned vs = INSTR (9, 5); 6360 unsigned vd = INSTR (4, 0); 6361 unsigned vm = INSTR (20, 16); 6362 unsigned i; 6363 6364 NYI_assert (15, 12, 4); 6365 NYI_assert (10, 10, 0); 6366 6367 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6368 switch (INSTR (23, 22)) 6369 { 6370 case 1: 6371 { 6372 unsigned elem; 6373 uint32_t val; 6374 6375 if (vm > 15) 6376 HALT_NYI; 6377 6378 elem = (INSTR (21, 20) << 1) | INSTR (11, 11); 6379 val = aarch64_get_vec_u16 (cpu, vm, elem); 6380 6381 for (i = 0; i < (full ? 8 : 4); i++) 6382 aarch64_set_vec_u32 (cpu, vd, i, 6383 aarch64_get_vec_u32 (cpu, vd, i) - 6384 (aarch64_get_vec_u32 (cpu, vs, i) * val)); 6385 return; 6386 } 6387 6388 case 2: 6389 { 6390 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11); 6391 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem); 6392 6393 for (i = 0; i < (full ? 4 : 2); i++) 6394 aarch64_set_vec_u64 (cpu, vd, i, 6395 aarch64_get_vec_u64 (cpu, vd, i) - 6396 (aarch64_get_vec_u64 (cpu, vs, i) * val)); 6397 return; 6398 } 6399 6400 case 0: 6401 case 3: 6402 default: 6403 HALT_NYI; 6404 } 6405 } 6406 6407 static void 6408 do_vec_SUB (sim_cpu *cpu) 6409 { 6410 /* instr [31] = 0 6411 instr [30] = half(0)/full(1) 6412 instr [29,24] = 10 1110 6413 instr [23,22] = size: byte(00, half(01), word (10), long (11) 6414 instr [21] = 1 6415 instr [20,16] = Vm 6416 instr [15,10] = 10 0001 6417 instr [9, 5] = Vn 6418 instr [4, 0] = Vd. */ 6419 6420 unsigned full = INSTR (30, 30); 6421 unsigned vm = INSTR (20, 16); 6422 unsigned vn = INSTR (9, 5); 6423 unsigned vd = INSTR (4, 0); 6424 unsigned i; 6425 6426 NYI_assert (29, 24, 0x2E); 6427 NYI_assert (21, 21, 1); 6428 NYI_assert (15, 10, 0x21); 6429 6430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6431 switch (INSTR (23, 22)) 6432 { 6433 case 0: 6434 for (i = 0; i < (full ? 16 : 8); i++) 6435 aarch64_set_vec_s8 (cpu, vd, i, 6436 aarch64_get_vec_s8 (cpu, vn, i) 6437 - aarch64_get_vec_s8 (cpu, vm, i)); 6438 return; 6439 6440 case 1: 6441 for (i = 0; i < (full ? 8 : 4); i++) 6442 aarch64_set_vec_s16 (cpu, vd, i, 6443 aarch64_get_vec_s16 (cpu, vn, i) 6444 - aarch64_get_vec_s16 (cpu, vm, i)); 6445 return; 6446 6447 case 2: 6448 for (i = 0; i < (full ? 4 : 2); i++) 6449 aarch64_set_vec_s32 (cpu, vd, i, 6450 aarch64_get_vec_s32 (cpu, vn, i) 6451 - aarch64_get_vec_s32 (cpu, vm, i)); 6452 return; 6453 6454 case 3: 6455 if (full == 0) 6456 HALT_UNALLOC; 6457 6458 for (i = 0; i < 2; i++) 6459 aarch64_set_vec_s64 (cpu, vd, i, 6460 aarch64_get_vec_s64 (cpu, vn, i) 6461 - aarch64_get_vec_s64 (cpu, vm, i)); 6462 return; 6463 } 6464 } 6465 6466 static void 6467 do_vec_MLS (sim_cpu *cpu) 6468 { 6469 /* instr [31] = 0 6470 instr [30] = half(0)/full(1) 6471 instr [29,24] = 10 1110 6472 instr [23,22] = size: byte(00, half(01), word (10) 6473 instr [21] = 1 6474 instr [20,16] = Vm 6475 instr [15,10] = 10 0101 6476 instr [9, 5] = Vn 6477 instr [4, 0] = Vd. */ 6478 6479 unsigned full = INSTR (30, 30); 6480 unsigned vm = INSTR (20, 16); 6481 unsigned vn = INSTR (9, 5); 6482 unsigned vd = INSTR (4, 0); 6483 unsigned i; 6484 6485 NYI_assert (29, 24, 0x2E); 6486 NYI_assert (21, 21, 1); 6487 NYI_assert (15, 10, 0x25); 6488 6489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6490 switch (INSTR (23, 22)) 6491 { 6492 case 0: 6493 for (i = 0; i < (full ? 16 : 8); i++) 6494 aarch64_set_vec_u8 (cpu, vd, i, 6495 aarch64_get_vec_u8 (cpu, vd, i) 6496 - (aarch64_get_vec_u8 (cpu, vn, i) 6497 * aarch64_get_vec_u8 (cpu, vm, i))); 6498 return; 6499 6500 case 1: 6501 for (i = 0; i < (full ? 8 : 4); i++) 6502 aarch64_set_vec_u16 (cpu, vd, i, 6503 aarch64_get_vec_u16 (cpu, vd, i) 6504 - (aarch64_get_vec_u16 (cpu, vn, i) 6505 * aarch64_get_vec_u16 (cpu, vm, i))); 6506 return; 6507 6508 case 2: 6509 for (i = 0; i < (full ? 4 : 2); i++) 6510 aarch64_set_vec_u32 (cpu, vd, i, 6511 aarch64_get_vec_u32 (cpu, vd, i) 6512 - (aarch64_get_vec_u32 (cpu, vn, i) 6513 * aarch64_get_vec_u32 (cpu, vm, i))); 6514 return; 6515 6516 default: 6517 HALT_UNALLOC; 6518 } 6519 } 6520 6521 static void 6522 do_vec_FDIV (sim_cpu *cpu) 6523 { 6524 /* instr [31] = 0 6525 instr [30] = half(0)/full(1) 6526 instr [29,23] = 10 1110 0 6527 instr [22] = float()/double(1) 6528 instr [21] = 1 6529 instr [20,16] = Vm 6530 instr [15,10] = 1111 11 6531 instr [9, 5] = Vn 6532 instr [4, 0] = Vd. */ 6533 6534 unsigned full = INSTR (30, 30); 6535 unsigned vm = INSTR (20, 16); 6536 unsigned vn = INSTR (9, 5); 6537 unsigned vd = INSTR (4, 0); 6538 unsigned i; 6539 6540 NYI_assert (29, 23, 0x5C); 6541 NYI_assert (21, 21, 1); 6542 NYI_assert (15, 10, 0x3F); 6543 6544 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6545 if (INSTR (22, 22)) 6546 { 6547 if (! full) 6548 HALT_UNALLOC; 6549 6550 for (i = 0; i < 2; i++) 6551 aarch64_set_vec_double (cpu, vd, i, 6552 aarch64_get_vec_double (cpu, vn, i) 6553 / aarch64_get_vec_double (cpu, vm, i)); 6554 } 6555 else 6556 for (i = 0; i < (full ? 4 : 2); i++) 6557 aarch64_set_vec_float (cpu, vd, i, 6558 aarch64_get_vec_float (cpu, vn, i) 6559 / aarch64_get_vec_float (cpu, vm, i)); 6560 } 6561 6562 static void 6563 do_vec_FMUL (sim_cpu *cpu) 6564 { 6565 /* instr [31] = 0 6566 instr [30] = half(0)/full(1) 6567 instr [29,23] = 10 1110 0 6568 instr [22] = float(0)/double(1) 6569 instr [21] = 1 6570 instr [20,16] = Vm 6571 instr [15,10] = 1101 11 6572 instr [9, 5] = Vn 6573 instr [4, 0] = Vd. */ 6574 6575 unsigned full = INSTR (30, 30); 6576 unsigned vm = INSTR (20, 16); 6577 unsigned vn = INSTR (9, 5); 6578 unsigned vd = INSTR (4, 0); 6579 unsigned i; 6580 6581 NYI_assert (29, 23, 0x5C); 6582 NYI_assert (21, 21, 1); 6583 NYI_assert (15, 10, 0x37); 6584 6585 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6586 if (INSTR (22, 22)) 6587 { 6588 if (! full) 6589 HALT_UNALLOC; 6590 6591 for (i = 0; i < 2; i++) 6592 aarch64_set_vec_double (cpu, vd, i, 6593 aarch64_get_vec_double (cpu, vn, i) 6594 * aarch64_get_vec_double (cpu, vm, i)); 6595 } 6596 else 6597 for (i = 0; i < (full ? 4 : 2); i++) 6598 aarch64_set_vec_float (cpu, vd, i, 6599 aarch64_get_vec_float (cpu, vn, i) 6600 * aarch64_get_vec_float (cpu, vm, i)); 6601 } 6602 6603 static void 6604 do_vec_FADDP (sim_cpu *cpu) 6605 { 6606 /* instr [31] = 0 6607 instr [30] = half(0)/full(1) 6608 instr [29,23] = 10 1110 0 6609 instr [22] = float(0)/double(1) 6610 instr [21] = 1 6611 instr [20,16] = Vm 6612 instr [15,10] = 1101 01 6613 instr [9, 5] = Vn 6614 instr [4, 0] = Vd. */ 6615 6616 unsigned full = INSTR (30, 30); 6617 unsigned vm = INSTR (20, 16); 6618 unsigned vn = INSTR (9, 5); 6619 unsigned vd = INSTR (4, 0); 6620 6621 NYI_assert (29, 23, 0x5C); 6622 NYI_assert (21, 21, 1); 6623 NYI_assert (15, 10, 0x35); 6624 6625 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6626 if (INSTR (22, 22)) 6627 { 6628 /* Extract values before adding them incase vd == vn/vm. */ 6629 double tmp1 = aarch64_get_vec_double (cpu, vn, 0); 6630 double tmp2 = aarch64_get_vec_double (cpu, vn, 1); 6631 double tmp3 = aarch64_get_vec_double (cpu, vm, 0); 6632 double tmp4 = aarch64_get_vec_double (cpu, vm, 1); 6633 6634 if (! full) 6635 HALT_UNALLOC; 6636 6637 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2); 6638 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4); 6639 } 6640 else 6641 { 6642 /* Extract values before adding them incase vd == vn/vm. */ 6643 float tmp1 = aarch64_get_vec_float (cpu, vn, 0); 6644 float tmp2 = aarch64_get_vec_float (cpu, vn, 1); 6645 float tmp5 = aarch64_get_vec_float (cpu, vm, 0); 6646 float tmp6 = aarch64_get_vec_float (cpu, vm, 1); 6647 6648 if (full) 6649 { 6650 float tmp3 = aarch64_get_vec_float (cpu, vn, 2); 6651 float tmp4 = aarch64_get_vec_float (cpu, vn, 3); 6652 float tmp7 = aarch64_get_vec_float (cpu, vm, 2); 6653 float tmp8 = aarch64_get_vec_float (cpu, vm, 3); 6654 6655 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2); 6656 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4); 6657 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6); 6658 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8); 6659 } 6660 else 6661 { 6662 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2); 6663 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6); 6664 } 6665 } 6666 } 6667 6668 static void 6669 do_vec_FSQRT (sim_cpu *cpu) 6670 { 6671 /* instr[31] = 0 6672 instr[30] = half(0)/full(1) 6673 instr[29,23] = 10 1110 1 6674 instr[22] = single(0)/double(1) 6675 instr[21,10] = 10 0001 1111 10 6676 instr[9,5] = Vsrc 6677 instr[4,0] = Vdest. */ 6678 6679 unsigned vn = INSTR (9, 5); 6680 unsigned vd = INSTR (4, 0); 6681 unsigned full = INSTR (30, 30); 6682 int i; 6683 6684 NYI_assert (29, 23, 0x5D); 6685 NYI_assert (21, 10, 0x87E); 6686 6687 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6688 if (INSTR (22, 22)) 6689 { 6690 if (! full) 6691 HALT_UNALLOC; 6692 6693 for (i = 0; i < 2; i++) 6694 aarch64_set_vec_double (cpu, vd, i, 6695 sqrt (aarch64_get_vec_double (cpu, vn, i))); 6696 } 6697 else 6698 { 6699 for (i = 0; i < (full ? 4 : 2); i++) 6700 aarch64_set_vec_float (cpu, vd, i, 6701 sqrtf (aarch64_get_vec_float (cpu, vn, i))); 6702 } 6703 } 6704 6705 static void 6706 do_vec_FNEG (sim_cpu *cpu) 6707 { 6708 /* instr[31] = 0 6709 instr[30] = half (0)/full (1) 6710 instr[29,23] = 10 1110 1 6711 instr[22] = single (0)/double (1) 6712 instr[21,10] = 10 0000 1111 10 6713 instr[9,5] = Vsrc 6714 instr[4,0] = Vdest. */ 6715 6716 unsigned vn = INSTR (9, 5); 6717 unsigned vd = INSTR (4, 0); 6718 unsigned full = INSTR (30, 30); 6719 int i; 6720 6721 NYI_assert (29, 23, 0x5D); 6722 NYI_assert (21, 10, 0x83E); 6723 6724 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6725 if (INSTR (22, 22)) 6726 { 6727 if (! full) 6728 HALT_UNALLOC; 6729 6730 for (i = 0; i < 2; i++) 6731 aarch64_set_vec_double (cpu, vd, i, 6732 - aarch64_get_vec_double (cpu, vn, i)); 6733 } 6734 else 6735 { 6736 for (i = 0; i < (full ? 4 : 2); i++) 6737 aarch64_set_vec_float (cpu, vd, i, 6738 - aarch64_get_vec_float (cpu, vn, i)); 6739 } 6740 } 6741 6742 static void 6743 do_vec_NOT (sim_cpu *cpu) 6744 { 6745 /* instr[31] = 0 6746 instr[30] = half (0)/full (1) 6747 instr[29,10] = 10 1110 0010 0000 0101 10 6748 instr[9,5] = Vn 6749 instr[4.0] = Vd. */ 6750 6751 unsigned vn = INSTR (9, 5); 6752 unsigned vd = INSTR (4, 0); 6753 unsigned i; 6754 int full = INSTR (30, 30); 6755 6756 NYI_assert (29, 10, 0xB8816); 6757 6758 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6759 for (i = 0; i < (full ? 16 : 8); i++) 6760 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i)); 6761 } 6762 6763 static unsigned int 6764 clz (uint64_t val, unsigned size) 6765 { 6766 uint64_t mask = 1; 6767 int count; 6768 6769 mask <<= (size - 1); 6770 count = 0; 6771 do 6772 { 6773 if (val & mask) 6774 break; 6775 mask >>= 1; 6776 count ++; 6777 } 6778 while (mask); 6779 6780 return count; 6781 } 6782 6783 static void 6784 do_vec_CLZ (sim_cpu *cpu) 6785 { 6786 /* instr[31] = 0 6787 instr[30] = half (0)/full (1) 6788 instr[29,24] = 10 1110 6789 instr[23,22] = size 6790 instr[21,10] = 10 0000 0100 10 6791 instr[9,5] = Vn 6792 instr[4.0] = Vd. */ 6793 6794 unsigned vn = INSTR (9, 5); 6795 unsigned vd = INSTR (4, 0); 6796 unsigned i; 6797 int full = INSTR (30,30); 6798 6799 NYI_assert (29, 24, 0x2E); 6800 NYI_assert (21, 10, 0x812); 6801 6802 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6803 switch (INSTR (23, 22)) 6804 { 6805 case 0: 6806 for (i = 0; i < (full ? 16 : 8); i++) 6807 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8)); 6808 break; 6809 case 1: 6810 for (i = 0; i < (full ? 8 : 4); i++) 6811 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16)); 6812 break; 6813 case 2: 6814 for (i = 0; i < (full ? 4 : 2); i++) 6815 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32)); 6816 break; 6817 case 3: 6818 if (! full) 6819 HALT_UNALLOC; 6820 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64)); 6821 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64)); 6822 break; 6823 } 6824 } 6825 6826 static void 6827 do_vec_MOV_element (sim_cpu *cpu) 6828 { 6829 /* instr[31,21] = 0110 1110 000 6830 instr[20,16] = size & dest index 6831 instr[15] = 0 6832 instr[14,11] = source index 6833 instr[10] = 1 6834 instr[9,5] = Vs 6835 instr[4.0] = Vd. */ 6836 6837 unsigned vs = INSTR (9, 5); 6838 unsigned vd = INSTR (4, 0); 6839 unsigned src_index; 6840 unsigned dst_index; 6841 6842 NYI_assert (31, 21, 0x370); 6843 NYI_assert (15, 15, 0); 6844 NYI_assert (10, 10, 1); 6845 6846 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6847 if (INSTR (16, 16)) 6848 { 6849 /* Move a byte. */ 6850 src_index = INSTR (14, 11); 6851 dst_index = INSTR (20, 17); 6852 aarch64_set_vec_u8 (cpu, vd, dst_index, 6853 aarch64_get_vec_u8 (cpu, vs, src_index)); 6854 } 6855 else if (INSTR (17, 17)) 6856 { 6857 /* Move 16-bits. */ 6858 NYI_assert (11, 11, 0); 6859 src_index = INSTR (14, 12); 6860 dst_index = INSTR (20, 18); 6861 aarch64_set_vec_u16 (cpu, vd, dst_index, 6862 aarch64_get_vec_u16 (cpu, vs, src_index)); 6863 } 6864 else if (INSTR (18, 18)) 6865 { 6866 /* Move 32-bits. */ 6867 NYI_assert (12, 11, 0); 6868 src_index = INSTR (14, 13); 6869 dst_index = INSTR (20, 19); 6870 aarch64_set_vec_u32 (cpu, vd, dst_index, 6871 aarch64_get_vec_u32 (cpu, vs, src_index)); 6872 } 6873 else 6874 { 6875 NYI_assert (19, 19, 1); 6876 NYI_assert (13, 11, 0); 6877 src_index = INSTR (14, 14); 6878 dst_index = INSTR (20, 20); 6879 aarch64_set_vec_u64 (cpu, vd, dst_index, 6880 aarch64_get_vec_u64 (cpu, vs, src_index)); 6881 } 6882 } 6883 6884 static void 6885 do_vec_REV32 (sim_cpu *cpu) 6886 { 6887 /* instr[31] = 0 6888 instr[30] = full/half 6889 instr[29,24] = 10 1110 6890 instr[23,22] = size 6891 instr[21,10] = 10 0000 0000 10 6892 instr[9,5] = Rn 6893 instr[4,0] = Rd. */ 6894 6895 unsigned rn = INSTR (9, 5); 6896 unsigned rd = INSTR (4, 0); 6897 unsigned size = INSTR (23, 22); 6898 unsigned full = INSTR (30, 30); 6899 unsigned i; 6900 FRegister val; 6901 6902 NYI_assert (29, 24, 0x2E); 6903 NYI_assert (21, 10, 0x802); 6904 6905 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6906 switch (size) 6907 { 6908 case 0: 6909 for (i = 0; i < (full ? 16 : 8); i++) 6910 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i); 6911 break; 6912 6913 case 1: 6914 for (i = 0; i < (full ? 8 : 4); i++) 6915 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i); 6916 break; 6917 6918 default: 6919 HALT_UNALLOC; 6920 } 6921 6922 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); 6923 if (full) 6924 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); 6925 } 6926 6927 static void 6928 do_vec_EXT (sim_cpu *cpu) 6929 { 6930 /* instr[31] = 0 6931 instr[30] = full/half 6932 instr[29,21] = 10 1110 000 6933 instr[20,16] = Vm 6934 instr[15] = 0 6935 instr[14,11] = source index 6936 instr[10] = 0 6937 instr[9,5] = Vn 6938 instr[4.0] = Vd. */ 6939 6940 unsigned vm = INSTR (20, 16); 6941 unsigned vn = INSTR (9, 5); 6942 unsigned vd = INSTR (4, 0); 6943 unsigned src_index = INSTR (14, 11); 6944 unsigned full = INSTR (30, 30); 6945 unsigned i; 6946 unsigned j; 6947 FRegister val; 6948 6949 NYI_assert (31, 21, 0x370); 6950 NYI_assert (15, 15, 0); 6951 NYI_assert (10, 10, 0); 6952 6953 if (!full && (src_index & 0x8)) 6954 HALT_UNALLOC; 6955 6956 j = 0; 6957 6958 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6959 for (i = src_index; i < (full ? 16 : 8); i++) 6960 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i); 6961 for (i = 0; i < src_index; i++) 6962 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i); 6963 6964 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]); 6965 if (full) 6966 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]); 6967 } 6968 6969 static void 6970 dexAdvSIMD0 (sim_cpu *cpu) 6971 { 6972 /* instr [28,25] = 0 111. */ 6973 if ( INSTR (15, 10) == 0x07 6974 && (INSTR (9, 5) == 6975 INSTR (20, 16))) 6976 { 6977 if (INSTR (31, 21) == 0x075 6978 || INSTR (31, 21) == 0x275) 6979 { 6980 do_vec_MOV_whole_vector (cpu); 6981 return; 6982 } 6983 } 6984 6985 if (INSTR (29, 19) == 0x1E0) 6986 { 6987 do_vec_MOV_immediate (cpu); 6988 return; 6989 } 6990 6991 if (INSTR (29, 19) == 0x5E0) 6992 { 6993 do_vec_MVNI (cpu); 6994 return; 6995 } 6996 6997 if (INSTR (29, 19) == 0x1C0 6998 || INSTR (29, 19) == 0x1C1) 6999 { 7000 if (INSTR (15, 10) == 0x03) 7001 { 7002 do_vec_DUP_scalar_into_vector (cpu); 7003 return; 7004 } 7005 } 7006 7007 switch (INSTR (29, 24)) 7008 { 7009 case 0x0E: do_vec_op1 (cpu); return; 7010 case 0x0F: do_vec_op2 (cpu); return; 7011 7012 case 0x2E: 7013 if (INSTR (21, 21) == 1) 7014 { 7015 switch (INSTR (15, 10)) 7016 { 7017 case 0x02: 7018 do_vec_REV32 (cpu); 7019 return; 7020 7021 case 0x07: 7022 switch (INSTR (23, 22)) 7023 { 7024 case 0: do_vec_EOR (cpu); return; 7025 case 1: do_vec_BSL (cpu); return; 7026 case 2: 7027 case 3: do_vec_bit (cpu); return; 7028 } 7029 break; 7030 7031 case 0x08: do_vec_sub_long (cpu); return; 7032 case 0x11: do_vec_USHL (cpu); return; 7033 case 0x12: do_vec_CLZ (cpu); return; 7034 case 0x16: do_vec_NOT (cpu); return; 7035 case 0x19: do_vec_max (cpu); return; 7036 case 0x1B: do_vec_min (cpu); return; 7037 case 0x21: do_vec_SUB (cpu); return; 7038 case 0x25: do_vec_MLS (cpu); return; 7039 case 0x31: do_vec_FminmaxNMP (cpu); return; 7040 case 0x35: do_vec_FADDP (cpu); return; 7041 case 0x37: do_vec_FMUL (cpu); return; 7042 case 0x3F: do_vec_FDIV (cpu); return; 7043 7044 case 0x3E: 7045 switch (INSTR (20, 16)) 7046 { 7047 case 0x00: do_vec_FNEG (cpu); return; 7048 case 0x01: do_vec_FSQRT (cpu); return; 7049 default: HALT_NYI; 7050 } 7051 7052 case 0x0D: 7053 case 0x0F: 7054 case 0x22: 7055 case 0x23: 7056 case 0x26: 7057 case 0x2A: 7058 case 0x32: 7059 case 0x36: 7060 case 0x39: 7061 case 0x3A: 7062 do_vec_compare (cpu); return; 7063 7064 default: 7065 break; 7066 } 7067 } 7068 7069 if (INSTR (31, 21) == 0x370) 7070 { 7071 if (INSTR (10, 10)) 7072 do_vec_MOV_element (cpu); 7073 else 7074 do_vec_EXT (cpu); 7075 return; 7076 } 7077 7078 switch (INSTR (21, 10)) 7079 { 7080 case 0x82E: do_vec_neg (cpu); return; 7081 case 0x87E: do_vec_sqrt (cpu); return; 7082 default: 7083 if (INSTR (15, 10) == 0x30) 7084 { 7085 do_vec_mull (cpu); 7086 return; 7087 } 7088 break; 7089 } 7090 break; 7091 7092 case 0x2f: 7093 switch (INSTR (15, 10)) 7094 { 7095 case 0x01: do_vec_SSHR_USHR (cpu); return; 7096 case 0x10: 7097 case 0x12: do_vec_mls_indexed (cpu); return; 7098 case 0x29: do_vec_xtl (cpu); return; 7099 default: 7100 HALT_NYI; 7101 } 7102 7103 default: 7104 break; 7105 } 7106 7107 HALT_NYI; 7108 } 7109 7110 /* 3 sources. */ 7111 7112 /* Float multiply add. */ 7113 static void 7114 fmadds (sim_cpu *cpu) 7115 { 7116 unsigned sa = INSTR (14, 10); 7117 unsigned sm = INSTR (20, 16); 7118 unsigned sn = INSTR ( 9, 5); 7119 unsigned sd = INSTR ( 4, 0); 7120 7121 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7122 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) 7123 + aarch64_get_FP_float (cpu, sn) 7124 * aarch64_get_FP_float (cpu, sm)); 7125 } 7126 7127 /* Double multiply add. */ 7128 static void 7129 fmaddd (sim_cpu *cpu) 7130 { 7131 unsigned sa = INSTR (14, 10); 7132 unsigned sm = INSTR (20, 16); 7133 unsigned sn = INSTR ( 9, 5); 7134 unsigned sd = INSTR ( 4, 0); 7135 7136 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7137 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) 7138 + aarch64_get_FP_double (cpu, sn) 7139 * aarch64_get_FP_double (cpu, sm)); 7140 } 7141 7142 /* Float multiply subtract. */ 7143 static void 7144 fmsubs (sim_cpu *cpu) 7145 { 7146 unsigned sa = INSTR (14, 10); 7147 unsigned sm = INSTR (20, 16); 7148 unsigned sn = INSTR ( 9, 5); 7149 unsigned sd = INSTR ( 4, 0); 7150 7151 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7152 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) 7153 - aarch64_get_FP_float (cpu, sn) 7154 * aarch64_get_FP_float (cpu, sm)); 7155 } 7156 7157 /* Double multiply subtract. */ 7158 static void 7159 fmsubd (sim_cpu *cpu) 7160 { 7161 unsigned sa = INSTR (14, 10); 7162 unsigned sm = INSTR (20, 16); 7163 unsigned sn = INSTR ( 9, 5); 7164 unsigned sd = INSTR ( 4, 0); 7165 7166 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7167 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) 7168 - aarch64_get_FP_double (cpu, sn) 7169 * aarch64_get_FP_double (cpu, sm)); 7170 } 7171 7172 /* Float negative multiply add. */ 7173 static void 7174 fnmadds (sim_cpu *cpu) 7175 { 7176 unsigned sa = INSTR (14, 10); 7177 unsigned sm = INSTR (20, 16); 7178 unsigned sn = INSTR ( 9, 5); 7179 unsigned sd = INSTR ( 4, 0); 7180 7181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7182 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) 7183 + (- aarch64_get_FP_float (cpu, sn)) 7184 * aarch64_get_FP_float (cpu, sm)); 7185 } 7186 7187 /* Double negative multiply add. */ 7188 static void 7189 fnmaddd (sim_cpu *cpu) 7190 { 7191 unsigned sa = INSTR (14, 10); 7192 unsigned sm = INSTR (20, 16); 7193 unsigned sn = INSTR ( 9, 5); 7194 unsigned sd = INSTR ( 4, 0); 7195 7196 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7197 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) 7198 + (- aarch64_get_FP_double (cpu, sn)) 7199 * aarch64_get_FP_double (cpu, sm)); 7200 } 7201 7202 /* Float negative multiply subtract. */ 7203 static void 7204 fnmsubs (sim_cpu *cpu) 7205 { 7206 unsigned sa = INSTR (14, 10); 7207 unsigned sm = INSTR (20, 16); 7208 unsigned sn = INSTR ( 9, 5); 7209 unsigned sd = INSTR ( 4, 0); 7210 7211 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7212 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) 7213 + aarch64_get_FP_float (cpu, sn) 7214 * aarch64_get_FP_float (cpu, sm)); 7215 } 7216 7217 /* Double negative multiply subtract. */ 7218 static void 7219 fnmsubd (sim_cpu *cpu) 7220 { 7221 unsigned sa = INSTR (14, 10); 7222 unsigned sm = INSTR (20, 16); 7223 unsigned sn = INSTR ( 9, 5); 7224 unsigned sd = INSTR ( 4, 0); 7225 7226 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7227 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) 7228 + aarch64_get_FP_double (cpu, sn) 7229 * aarch64_get_FP_double (cpu, sm)); 7230 } 7231 7232 static void 7233 dexSimpleFPDataProc3Source (sim_cpu *cpu) 7234 { 7235 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC 7236 instr[30] = 0 7237 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC 7238 instr[28,25] = 1111 7239 instr[24] = 1 7240 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC 7241 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated 7242 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */ 7243 7244 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); 7245 /* dispatch on combined type:o1:o2. */ 7246 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15); 7247 7248 if (M_S != 0) 7249 HALT_UNALLOC; 7250 7251 switch (dispatch) 7252 { 7253 case 0: fmadds (cpu); return; 7254 case 1: fmsubs (cpu); return; 7255 case 2: fnmadds (cpu); return; 7256 case 3: fnmsubs (cpu); return; 7257 case 4: fmaddd (cpu); return; 7258 case 5: fmsubd (cpu); return; 7259 case 6: fnmaddd (cpu); return; 7260 case 7: fnmsubd (cpu); return; 7261 default: 7262 /* type > 1 is currently unallocated. */ 7263 HALT_UNALLOC; 7264 } 7265 } 7266 7267 static void 7268 dexSimpleFPFixedConvert (sim_cpu *cpu) 7269 { 7270 HALT_NYI; 7271 } 7272 7273 static void 7274 dexSimpleFPCondCompare (sim_cpu *cpu) 7275 { 7276 /* instr [31,23] = 0001 1110 0 7277 instr [22] = type 7278 instr [21] = 1 7279 instr [20,16] = Rm 7280 instr [15,12] = condition 7281 instr [11,10] = 01 7282 instr [9,5] = Rn 7283 instr [4] = 0 7284 instr [3,0] = nzcv */ 7285 7286 unsigned rm = INSTR (20, 16); 7287 unsigned rn = INSTR (9, 5); 7288 7289 NYI_assert (31, 23, 0x3C); 7290 NYI_assert (11, 10, 0x1); 7291 NYI_assert (4, 4, 0); 7292 7293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7294 if (! testConditionCode (cpu, INSTR (15, 12))) 7295 { 7296 aarch64_set_CPSR (cpu, INSTR (3, 0)); 7297 return; 7298 } 7299 7300 if (INSTR (22, 22)) 7301 { 7302 /* Double precision. */ 7303 double val1 = aarch64_get_vec_double (cpu, rn, 0); 7304 double val2 = aarch64_get_vec_double (cpu, rm, 0); 7305 7306 /* FIXME: Check for NaNs. */ 7307 if (val1 == val2) 7308 aarch64_set_CPSR (cpu, (Z | C)); 7309 else if (val1 < val2) 7310 aarch64_set_CPSR (cpu, N); 7311 else /* val1 > val2 */ 7312 aarch64_set_CPSR (cpu, C); 7313 } 7314 else 7315 { 7316 /* Single precision. */ 7317 float val1 = aarch64_get_vec_float (cpu, rn, 0); 7318 float val2 = aarch64_get_vec_float (cpu, rm, 0); 7319 7320 /* FIXME: Check for NaNs. */ 7321 if (val1 == val2) 7322 aarch64_set_CPSR (cpu, (Z | C)); 7323 else if (val1 < val2) 7324 aarch64_set_CPSR (cpu, N); 7325 else /* val1 > val2 */ 7326 aarch64_set_CPSR (cpu, C); 7327 } 7328 } 7329 7330 /* 2 sources. */ 7331 7332 /* Float add. */ 7333 static void 7334 fadds (sim_cpu *cpu) 7335 { 7336 unsigned sm = INSTR (20, 16); 7337 unsigned sn = INSTR ( 9, 5); 7338 unsigned sd = INSTR ( 4, 0); 7339 7340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7341 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) 7342 + aarch64_get_FP_float (cpu, sm)); 7343 } 7344 7345 /* Double add. */ 7346 static void 7347 faddd (sim_cpu *cpu) 7348 { 7349 unsigned sm = INSTR (20, 16); 7350 unsigned sn = INSTR ( 9, 5); 7351 unsigned sd = INSTR ( 4, 0); 7352 7353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7354 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) 7355 + aarch64_get_FP_double (cpu, sm)); 7356 } 7357 7358 /* Float divide. */ 7359 static void 7360 fdivs (sim_cpu *cpu) 7361 { 7362 unsigned sm = INSTR (20, 16); 7363 unsigned sn = INSTR ( 9, 5); 7364 unsigned sd = INSTR ( 4, 0); 7365 7366 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7367 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) 7368 / aarch64_get_FP_float (cpu, sm)); 7369 } 7370 7371 /* Double divide. */ 7372 static void 7373 fdivd (sim_cpu *cpu) 7374 { 7375 unsigned sm = INSTR (20, 16); 7376 unsigned sn = INSTR ( 9, 5); 7377 unsigned sd = INSTR ( 4, 0); 7378 7379 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7380 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) 7381 / aarch64_get_FP_double (cpu, sm)); 7382 } 7383 7384 /* Float multiply. */ 7385 static void 7386 fmuls (sim_cpu *cpu) 7387 { 7388 unsigned sm = INSTR (20, 16); 7389 unsigned sn = INSTR ( 9, 5); 7390 unsigned sd = INSTR ( 4, 0); 7391 7392 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7393 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) 7394 * aarch64_get_FP_float (cpu, sm)); 7395 } 7396 7397 /* Double multiply. */ 7398 static void 7399 fmuld (sim_cpu *cpu) 7400 { 7401 unsigned sm = INSTR (20, 16); 7402 unsigned sn = INSTR ( 9, 5); 7403 unsigned sd = INSTR ( 4, 0); 7404 7405 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7406 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) 7407 * aarch64_get_FP_double (cpu, sm)); 7408 } 7409 7410 /* Float negate and multiply. */ 7411 static void 7412 fnmuls (sim_cpu *cpu) 7413 { 7414 unsigned sm = INSTR (20, 16); 7415 unsigned sn = INSTR ( 9, 5); 7416 unsigned sd = INSTR ( 4, 0); 7417 7418 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7419 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn) 7420 * aarch64_get_FP_float (cpu, sm))); 7421 } 7422 7423 /* Double negate and multiply. */ 7424 static void 7425 fnmuld (sim_cpu *cpu) 7426 { 7427 unsigned sm = INSTR (20, 16); 7428 unsigned sn = INSTR ( 9, 5); 7429 unsigned sd = INSTR ( 4, 0); 7430 7431 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7432 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn) 7433 * aarch64_get_FP_double (cpu, sm))); 7434 } 7435 7436 /* Float subtract. */ 7437 static void 7438 fsubs (sim_cpu *cpu) 7439 { 7440 unsigned sm = INSTR (20, 16); 7441 unsigned sn = INSTR ( 9, 5); 7442 unsigned sd = INSTR ( 4, 0); 7443 7444 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7445 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) 7446 - aarch64_get_FP_float (cpu, sm)); 7447 } 7448 7449 /* Double subtract. */ 7450 static void 7451 fsubd (sim_cpu *cpu) 7452 { 7453 unsigned sm = INSTR (20, 16); 7454 unsigned sn = INSTR ( 9, 5); 7455 unsigned sd = INSTR ( 4, 0); 7456 7457 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7458 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) 7459 - aarch64_get_FP_double (cpu, sm)); 7460 } 7461 7462 static void 7463 do_FMINNM (sim_cpu *cpu) 7464 { 7465 /* instr[31,23] = 0 0011 1100 7466 instr[22] = float(0)/double(1) 7467 instr[21] = 1 7468 instr[20,16] = Sm 7469 instr[15,10] = 01 1110 7470 instr[9,5] = Sn 7471 instr[4,0] = Cpu */ 7472 7473 unsigned sm = INSTR (20, 16); 7474 unsigned sn = INSTR ( 9, 5); 7475 unsigned sd = INSTR ( 4, 0); 7476 7477 NYI_assert (31, 23, 0x03C); 7478 NYI_assert (15, 10, 0x1E); 7479 7480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7481 if (INSTR (22, 22)) 7482 aarch64_set_FP_double (cpu, sd, 7483 dminnm (aarch64_get_FP_double (cpu, sn), 7484 aarch64_get_FP_double (cpu, sm))); 7485 else 7486 aarch64_set_FP_float (cpu, sd, 7487 fminnm (aarch64_get_FP_float (cpu, sn), 7488 aarch64_get_FP_float (cpu, sm))); 7489 } 7490 7491 static void 7492 do_FMAXNM (sim_cpu *cpu) 7493 { 7494 /* instr[31,23] = 0 0011 1100 7495 instr[22] = float(0)/double(1) 7496 instr[21] = 1 7497 instr[20,16] = Sm 7498 instr[15,10] = 01 1010 7499 instr[9,5] = Sn 7500 instr[4,0] = Cpu */ 7501 7502 unsigned sm = INSTR (20, 16); 7503 unsigned sn = INSTR ( 9, 5); 7504 unsigned sd = INSTR ( 4, 0); 7505 7506 NYI_assert (31, 23, 0x03C); 7507 NYI_assert (15, 10, 0x1A); 7508 7509 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7510 if (INSTR (22, 22)) 7511 aarch64_set_FP_double (cpu, sd, 7512 dmaxnm (aarch64_get_FP_double (cpu, sn), 7513 aarch64_get_FP_double (cpu, sm))); 7514 else 7515 aarch64_set_FP_float (cpu, sd, 7516 fmaxnm (aarch64_get_FP_float (cpu, sn), 7517 aarch64_get_FP_float (cpu, sm))); 7518 } 7519 7520 static void 7521 dexSimpleFPDataProc2Source (sim_cpu *cpu) 7522 { 7523 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC 7524 instr[30] = 0 7525 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC 7526 instr[28,25] = 1111 7527 instr[24] = 0 7528 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC 7529 instr[21] = 1 7530 instr[20,16] = Vm 7531 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV 7532 0010 ==> FADD, 0011 ==> FSUB, 7533 0100 ==> FMAX, 0101 ==> FMIN 7534 0110 ==> FMAXNM, 0111 ==> FMINNM 7535 1000 ==> FNMUL, ow ==> UNALLOC 7536 instr[11,10] = 10 7537 instr[9,5] = Vn 7538 instr[4,0] = Vd */ 7539 7540 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); 7541 uint32_t type = INSTR (23, 22); 7542 /* Dispatch on opcode. */ 7543 uint32_t dispatch = INSTR (15, 12); 7544 7545 if (type > 1) 7546 HALT_UNALLOC; 7547 7548 if (M_S != 0) 7549 HALT_UNALLOC; 7550 7551 if (type) 7552 switch (dispatch) 7553 { 7554 case 0: fmuld (cpu); return; 7555 case 1: fdivd (cpu); return; 7556 case 2: faddd (cpu); return; 7557 case 3: fsubd (cpu); return; 7558 case 6: do_FMAXNM (cpu); return; 7559 case 7: do_FMINNM (cpu); return; 7560 case 8: fnmuld (cpu); return; 7561 7562 /* Have not yet implemented fmax and fmin. */ 7563 case 4: 7564 case 5: 7565 HALT_NYI; 7566 7567 default: 7568 HALT_UNALLOC; 7569 } 7570 else /* type == 0 => floats. */ 7571 switch (dispatch) 7572 { 7573 case 0: fmuls (cpu); return; 7574 case 1: fdivs (cpu); return; 7575 case 2: fadds (cpu); return; 7576 case 3: fsubs (cpu); return; 7577 case 6: do_FMAXNM (cpu); return; 7578 case 7: do_FMINNM (cpu); return; 7579 case 8: fnmuls (cpu); return; 7580 7581 case 4: 7582 case 5: 7583 HALT_NYI; 7584 7585 default: 7586 HALT_UNALLOC; 7587 } 7588 } 7589 7590 static void 7591 dexSimpleFPCondSelect (sim_cpu *cpu) 7592 { 7593 /* FCSEL 7594 instr[31,23] = 0 0011 1100 7595 instr[22] = 0=>single 1=>double 7596 instr[21] = 1 7597 instr[20,16] = Sm 7598 instr[15,12] = cond 7599 instr[11,10] = 11 7600 instr[9,5] = Sn 7601 instr[4,0] = Cpu */ 7602 unsigned sm = INSTR (20, 16); 7603 unsigned sn = INSTR ( 9, 5); 7604 unsigned sd = INSTR ( 4, 0); 7605 uint32_t set = testConditionCode (cpu, INSTR (15, 12)); 7606 7607 NYI_assert (31, 23, 0x03C); 7608 NYI_assert (11, 10, 0x3); 7609 7610 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7611 if (INSTR (22, 22)) 7612 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn) 7613 : aarch64_get_FP_double (cpu, sm))); 7614 else 7615 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn) 7616 : aarch64_get_FP_float (cpu, sm))); 7617 } 7618 7619 /* Store 32 bit unscaled signed 9 bit. */ 7620 static void 7621 fsturs (sim_cpu *cpu, int32_t offset) 7622 { 7623 unsigned int rn = INSTR (9, 5); 7624 unsigned int st = INSTR (4, 0); 7625 7626 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7627 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset, 7628 aarch64_get_vec_u32 (cpu, st, 0)); 7629 } 7630 7631 /* Store 64 bit unscaled signed 9 bit. */ 7632 static void 7633 fsturd (sim_cpu *cpu, int32_t offset) 7634 { 7635 unsigned int rn = INSTR (9, 5); 7636 unsigned int st = INSTR (4, 0); 7637 7638 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7639 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset, 7640 aarch64_get_vec_u64 (cpu, st, 0)); 7641 } 7642 7643 /* Store 128 bit unscaled signed 9 bit. */ 7644 static void 7645 fsturq (sim_cpu *cpu, int32_t offset) 7646 { 7647 unsigned int rn = INSTR (9, 5); 7648 unsigned int st = INSTR (4, 0); 7649 FRegister a; 7650 7651 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7652 aarch64_get_FP_long_double (cpu, st, & a); 7653 aarch64_set_mem_long_double (cpu, 7654 aarch64_get_reg_u64 (cpu, rn, 1) 7655 + offset, a); 7656 } 7657 7658 /* TODO FP move register. */ 7659 7660 /* 32 bit fp to fp move register. */ 7661 static void 7662 ffmovs (sim_cpu *cpu) 7663 { 7664 unsigned int rn = INSTR (9, 5); 7665 unsigned int st = INSTR (4, 0); 7666 7667 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7668 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn)); 7669 } 7670 7671 /* 64 bit fp to fp move register. */ 7672 static void 7673 ffmovd (sim_cpu *cpu) 7674 { 7675 unsigned int rn = INSTR (9, 5); 7676 unsigned int st = INSTR (4, 0); 7677 7678 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7679 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn)); 7680 } 7681 7682 /* 32 bit GReg to Vec move register. */ 7683 static void 7684 fgmovs (sim_cpu *cpu) 7685 { 7686 unsigned int rn = INSTR (9, 5); 7687 unsigned int st = INSTR (4, 0); 7688 7689 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7690 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP)); 7691 } 7692 7693 /* 64 bit g to fp move register. */ 7694 static void 7695 fgmovd (sim_cpu *cpu) 7696 { 7697 unsigned int rn = INSTR (9, 5); 7698 unsigned int st = INSTR (4, 0); 7699 7700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7701 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 7702 } 7703 7704 /* 32 bit fp to g move register. */ 7705 static void 7706 gfmovs (sim_cpu *cpu) 7707 { 7708 unsigned int rn = INSTR (9, 5); 7709 unsigned int st = INSTR (4, 0); 7710 7711 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7712 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0)); 7713 } 7714 7715 /* 64 bit fp to g move register. */ 7716 static void 7717 gfmovd (sim_cpu *cpu) 7718 { 7719 unsigned int rn = INSTR (9, 5); 7720 unsigned int st = INSTR (4, 0); 7721 7722 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7723 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0)); 7724 } 7725 7726 /* FP move immediate 7727 7728 These install an immediate 8 bit value in the target register 7729 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3 7730 bit exponent. */ 7731 7732 static void 7733 fmovs (sim_cpu *cpu) 7734 { 7735 unsigned int sd = INSTR (4, 0); 7736 uint32_t imm = INSTR (20, 13); 7737 float f = fp_immediate_for_encoding_32 (imm); 7738 7739 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7740 aarch64_set_FP_float (cpu, sd, f); 7741 } 7742 7743 static void 7744 fmovd (sim_cpu *cpu) 7745 { 7746 unsigned int sd = INSTR (4, 0); 7747 uint32_t imm = INSTR (20, 13); 7748 double d = fp_immediate_for_encoding_64 (imm); 7749 7750 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7751 aarch64_set_FP_double (cpu, sd, d); 7752 } 7753 7754 static void 7755 dexSimpleFPImmediate (sim_cpu *cpu) 7756 { 7757 /* instr[31,23] == 00111100 7758 instr[22] == type : single(0)/double(1) 7759 instr[21] == 1 7760 instr[20,13] == imm8 7761 instr[12,10] == 100 7762 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC 7763 instr[4,0] == Rd */ 7764 uint32_t imm5 = INSTR (9, 5); 7765 7766 NYI_assert (31, 23, 0x3C); 7767 7768 if (imm5 != 0) 7769 HALT_UNALLOC; 7770 7771 if (INSTR (22, 22)) 7772 fmovd (cpu); 7773 else 7774 fmovs (cpu); 7775 } 7776 7777 /* TODO specific decode and execute for group Load Store. */ 7778 7779 /* TODO FP load/store single register (unscaled offset). */ 7780 7781 /* TODO load 8 bit unscaled signed 9 bit. */ 7782 /* TODO load 16 bit unscaled signed 9 bit. */ 7783 7784 /* Load 32 bit unscaled signed 9 bit. */ 7785 static void 7786 fldurs (sim_cpu *cpu, int32_t offset) 7787 { 7788 unsigned int rn = INSTR (9, 5); 7789 unsigned int st = INSTR (4, 0); 7790 7791 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7792 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 7793 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); 7794 } 7795 7796 /* Load 64 bit unscaled signed 9 bit. */ 7797 static void 7798 fldurd (sim_cpu *cpu, int32_t offset) 7799 { 7800 unsigned int rn = INSTR (9, 5); 7801 unsigned int st = INSTR (4, 0); 7802 7803 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7804 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 7805 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); 7806 } 7807 7808 /* Load 128 bit unscaled signed 9 bit. */ 7809 static void 7810 fldurq (sim_cpu *cpu, int32_t offset) 7811 { 7812 unsigned int rn = INSTR (9, 5); 7813 unsigned int st = INSTR (4, 0); 7814 FRegister a; 7815 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; 7816 7817 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7818 aarch64_get_mem_long_double (cpu, addr, & a); 7819 aarch64_set_FP_long_double (cpu, st, a); 7820 } 7821 7822 /* TODO store 8 bit unscaled signed 9 bit. */ 7823 /* TODO store 16 bit unscaled signed 9 bit. */ 7824 7825 7826 /* 1 source. */ 7827 7828 /* Float absolute value. */ 7829 static void 7830 fabss (sim_cpu *cpu) 7831 { 7832 unsigned sn = INSTR (9, 5); 7833 unsigned sd = INSTR (4, 0); 7834 float value = aarch64_get_FP_float (cpu, sn); 7835 7836 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7837 aarch64_set_FP_float (cpu, sd, fabsf (value)); 7838 } 7839 7840 /* Double absolute value. */ 7841 static void 7842 fabcpu (sim_cpu *cpu) 7843 { 7844 unsigned sn = INSTR (9, 5); 7845 unsigned sd = INSTR (4, 0); 7846 double value = aarch64_get_FP_double (cpu, sn); 7847 7848 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7849 aarch64_set_FP_double (cpu, sd, fabs (value)); 7850 } 7851 7852 /* Float negative value. */ 7853 static void 7854 fnegs (sim_cpu *cpu) 7855 { 7856 unsigned sn = INSTR (9, 5); 7857 unsigned sd = INSTR (4, 0); 7858 7859 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7860 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn)); 7861 } 7862 7863 /* Double negative value. */ 7864 static void 7865 fnegd (sim_cpu *cpu) 7866 { 7867 unsigned sn = INSTR (9, 5); 7868 unsigned sd = INSTR (4, 0); 7869 7870 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7871 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn)); 7872 } 7873 7874 /* Float square root. */ 7875 static void 7876 fsqrts (sim_cpu *cpu) 7877 { 7878 unsigned sn = INSTR (9, 5); 7879 unsigned sd = INSTR (4, 0); 7880 7881 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7882 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn))); 7883 } 7884 7885 /* Double square root. */ 7886 static void 7887 fsqrtd (sim_cpu *cpu) 7888 { 7889 unsigned sn = INSTR (9, 5); 7890 unsigned sd = INSTR (4, 0); 7891 7892 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7893 aarch64_set_FP_double (cpu, sd, 7894 sqrt (aarch64_get_FP_double (cpu, sn))); 7895 } 7896 7897 /* Convert double to float. */ 7898 static void 7899 fcvtds (sim_cpu *cpu) 7900 { 7901 unsigned sn = INSTR (9, 5); 7902 unsigned sd = INSTR (4, 0); 7903 7904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7905 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn)); 7906 } 7907 7908 /* Convert float to double. */ 7909 static void 7910 fcvtcpu (sim_cpu *cpu) 7911 { 7912 unsigned sn = INSTR (9, 5); 7913 unsigned sd = INSTR (4, 0); 7914 7915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7916 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn)); 7917 } 7918 7919 static void 7920 do_FRINT (sim_cpu *cpu) 7921 { 7922 /* instr[31,23] = 0001 1110 0 7923 instr[22] = single(0)/double(1) 7924 instr[21,18] = 1001 7925 instr[17,15] = rounding mode 7926 instr[14,10] = 10000 7927 instr[9,5] = source 7928 instr[4,0] = dest */ 7929 7930 float val; 7931 unsigned rs = INSTR (9, 5); 7932 unsigned rd = INSTR (4, 0); 7933 unsigned int rmode = INSTR (17, 15); 7934 7935 NYI_assert (31, 23, 0x03C); 7936 NYI_assert (21, 18, 0x9); 7937 NYI_assert (14, 10, 0x10); 7938 7939 if (rmode == 6 || rmode == 7) 7940 /* FIXME: Add support for rmode == 6 exactness check. */ 7941 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22); 7942 7943 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7944 if (INSTR (22, 22)) 7945 { 7946 double val = aarch64_get_FP_double (cpu, rs); 7947 7948 switch (rmode) 7949 { 7950 case 0: /* mode N: nearest or even. */ 7951 { 7952 double rval = round (val); 7953 7954 if (val - rval == 0.5) 7955 { 7956 if (((rval / 2.0) * 2.0) != rval) 7957 rval += 1.0; 7958 } 7959 7960 aarch64_set_FP_double (cpu, rd, round (val)); 7961 return; 7962 } 7963 7964 case 1: /* mode P: towards +inf. */ 7965 if (val < 0.0) 7966 aarch64_set_FP_double (cpu, rd, trunc (val)); 7967 else 7968 aarch64_set_FP_double (cpu, rd, round (val)); 7969 return; 7970 7971 case 2: /* mode M: towards -inf. */ 7972 if (val < 0.0) 7973 aarch64_set_FP_double (cpu, rd, round (val)); 7974 else 7975 aarch64_set_FP_double (cpu, rd, trunc (val)); 7976 return; 7977 7978 case 3: /* mode Z: towards 0. */ 7979 aarch64_set_FP_double (cpu, rd, trunc (val)); 7980 return; 7981 7982 case 4: /* mode A: away from 0. */ 7983 aarch64_set_FP_double (cpu, rd, round (val)); 7984 return; 7985 7986 case 6: /* mode X: use FPCR with exactness check. */ 7987 case 7: /* mode I: use FPCR mode. */ 7988 HALT_NYI; 7989 7990 default: 7991 HALT_UNALLOC; 7992 } 7993 } 7994 7995 val = aarch64_get_FP_float (cpu, rs); 7996 7997 switch (rmode) 7998 { 7999 case 0: /* mode N: nearest or even. */ 8000 { 8001 float rval = roundf (val); 8002 8003 if (val - rval == 0.5) 8004 { 8005 if (((rval / 2.0) * 2.0) != rval) 8006 rval += 1.0; 8007 } 8008 8009 aarch64_set_FP_float (cpu, rd, rval); 8010 return; 8011 } 8012 8013 case 1: /* mode P: towards +inf. */ 8014 if (val < 0.0) 8015 aarch64_set_FP_float (cpu, rd, truncf (val)); 8016 else 8017 aarch64_set_FP_float (cpu, rd, roundf (val)); 8018 return; 8019 8020 case 2: /* mode M: towards -inf. */ 8021 if (val < 0.0) 8022 aarch64_set_FP_float (cpu, rd, truncf (val)); 8023 else 8024 aarch64_set_FP_float (cpu, rd, roundf (val)); 8025 return; 8026 8027 case 3: /* mode Z: towards 0. */ 8028 aarch64_set_FP_float (cpu, rd, truncf (val)); 8029 return; 8030 8031 case 4: /* mode A: away from 0. */ 8032 aarch64_set_FP_float (cpu, rd, roundf (val)); 8033 return; 8034 8035 case 6: /* mode X: use FPCR with exactness check. */ 8036 case 7: /* mode I: use FPCR mode. */ 8037 HALT_NYI; 8038 8039 default: 8040 HALT_UNALLOC; 8041 } 8042 } 8043 8044 /* Convert half to float. */ 8045 static void 8046 do_FCVT_half_to_single (sim_cpu *cpu) 8047 { 8048 unsigned rn = INSTR (9, 5); 8049 unsigned rd = INSTR (4, 0); 8050 8051 NYI_assert (31, 10, 0x7B890); 8052 8053 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8054 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn)); 8055 } 8056 8057 /* Convert half to double. */ 8058 static void 8059 do_FCVT_half_to_double (sim_cpu *cpu) 8060 { 8061 unsigned rn = INSTR (9, 5); 8062 unsigned rd = INSTR (4, 0); 8063 8064 NYI_assert (31, 10, 0x7B8B0); 8065 8066 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8067 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn)); 8068 } 8069 8070 static void 8071 do_FCVT_single_to_half (sim_cpu *cpu) 8072 { 8073 unsigned rn = INSTR (9, 5); 8074 unsigned rd = INSTR (4, 0); 8075 8076 NYI_assert (31, 10, 0x788F0); 8077 8078 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8079 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn)); 8080 } 8081 8082 /* Convert double to half. */ 8083 static void 8084 do_FCVT_double_to_half (sim_cpu *cpu) 8085 { 8086 unsigned rn = INSTR (9, 5); 8087 unsigned rd = INSTR (4, 0); 8088 8089 NYI_assert (31, 10, 0x798F0); 8090 8091 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8092 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn)); 8093 } 8094 8095 static void 8096 dexSimpleFPDataProc1Source (sim_cpu *cpu) 8097 { 8098 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC 8099 instr[30] = 0 8100 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC 8101 instr[28,25] = 1111 8102 instr[24] = 0 8103 instr[23,22] ==> type : 00 ==> source is single, 8104 01 ==> source is double 8105 10 ==> UNALLOC 8106 11 ==> UNALLOC or source is half 8107 instr[21] = 1 8108 instr[20,15] ==> opcode : with type 00 or 01 8109 000000 ==> FMOV, 000001 ==> FABS, 8110 000010 ==> FNEG, 000011 ==> FSQRT, 8111 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double) 8112 000110 ==> UNALLOC, 000111 ==> FCVT (to half) 8113 001000 ==> FRINTN, 001001 ==> FRINTP, 8114 001010 ==> FRINTM, 001011 ==> FRINTZ, 8115 001100 ==> FRINTA, 001101 ==> UNALLOC 8116 001110 ==> FRINTX, 001111 ==> FRINTI 8117 with type 11 8118 000100 ==> FCVT (half-to-single) 8119 000101 ==> FCVT (half-to-double) 8120 instr[14,10] = 10000. */ 8121 8122 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); 8123 uint32_t type = INSTR (23, 22); 8124 uint32_t opcode = INSTR (20, 15); 8125 8126 if (M_S != 0) 8127 HALT_UNALLOC; 8128 8129 if (type == 3) 8130 { 8131 if (opcode == 4) 8132 do_FCVT_half_to_single (cpu); 8133 else if (opcode == 5) 8134 do_FCVT_half_to_double (cpu); 8135 else 8136 HALT_UNALLOC; 8137 return; 8138 } 8139 8140 if (type == 2) 8141 HALT_UNALLOC; 8142 8143 switch (opcode) 8144 { 8145 case 0: 8146 if (type) 8147 ffmovd (cpu); 8148 else 8149 ffmovs (cpu); 8150 return; 8151 8152 case 1: 8153 if (type) 8154 fabcpu (cpu); 8155 else 8156 fabss (cpu); 8157 return; 8158 8159 case 2: 8160 if (type) 8161 fnegd (cpu); 8162 else 8163 fnegs (cpu); 8164 return; 8165 8166 case 3: 8167 if (type) 8168 fsqrtd (cpu); 8169 else 8170 fsqrts (cpu); 8171 return; 8172 8173 case 4: 8174 if (type) 8175 fcvtds (cpu); 8176 else 8177 HALT_UNALLOC; 8178 return; 8179 8180 case 5: 8181 if (type) 8182 HALT_UNALLOC; 8183 fcvtcpu (cpu); 8184 return; 8185 8186 case 8: /* FRINTN etc. */ 8187 case 9: 8188 case 10: 8189 case 11: 8190 case 12: 8191 case 14: 8192 case 15: 8193 do_FRINT (cpu); 8194 return; 8195 8196 case 7: 8197 if (INSTR (22, 22)) 8198 do_FCVT_double_to_half (cpu); 8199 else 8200 do_FCVT_single_to_half (cpu); 8201 return; 8202 8203 case 13: 8204 HALT_NYI; 8205 8206 default: 8207 HALT_UNALLOC; 8208 } 8209 } 8210 8211 /* 32 bit signed int to float. */ 8212 static void 8213 scvtf32 (sim_cpu *cpu) 8214 { 8215 unsigned rn = INSTR (9, 5); 8216 unsigned sd = INSTR (4, 0); 8217 8218 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8219 aarch64_set_FP_float 8220 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP)); 8221 } 8222 8223 /* signed int to float. */ 8224 static void 8225 scvtf (sim_cpu *cpu) 8226 { 8227 unsigned rn = INSTR (9, 5); 8228 unsigned sd = INSTR (4, 0); 8229 8230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8231 aarch64_set_FP_float 8232 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP)); 8233 } 8234 8235 /* 32 bit signed int to double. */ 8236 static void 8237 scvtd32 (sim_cpu *cpu) 8238 { 8239 unsigned rn = INSTR (9, 5); 8240 unsigned sd = INSTR (4, 0); 8241 8242 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8243 aarch64_set_FP_double 8244 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP)); 8245 } 8246 8247 /* signed int to double. */ 8248 static void 8249 scvtd (sim_cpu *cpu) 8250 { 8251 unsigned rn = INSTR (9, 5); 8252 unsigned sd = INSTR (4, 0); 8253 8254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8255 aarch64_set_FP_double 8256 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP)); 8257 } 8258 8259 static const float FLOAT_INT_MAX = (float) INT_MAX; 8260 static const float FLOAT_INT_MIN = (float) INT_MIN; 8261 static const double DOUBLE_INT_MAX = (double) INT_MAX; 8262 static const double DOUBLE_INT_MIN = (double) INT_MIN; 8263 static const float FLOAT_LONG_MAX = (float) LONG_MAX; 8264 static const float FLOAT_LONG_MIN = (float) LONG_MIN; 8265 static const double DOUBLE_LONG_MAX = (double) LONG_MAX; 8266 static const double DOUBLE_LONG_MIN = (double) LONG_MIN; 8267 8268 #define UINT_MIN 0 8269 #define ULONG_MIN 0 8270 static const float FLOAT_UINT_MAX = (float) UINT_MAX; 8271 static const float FLOAT_UINT_MIN = (float) UINT_MIN; 8272 static const double DOUBLE_UINT_MAX = (double) UINT_MAX; 8273 static const double DOUBLE_UINT_MIN = (double) UINT_MIN; 8274 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX; 8275 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN; 8276 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX; 8277 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN; 8278 8279 /* Check for FP exception conditions: 8280 NaN raises IO 8281 Infinity raises IO 8282 Out of Range raises IO and IX and saturates value 8283 Denormal raises ID and IX and sets to zero. */ 8284 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \ 8285 do \ 8286 { \ 8287 switch (fpclassify (F)) \ 8288 { \ 8289 case FP_INFINITE: \ 8290 case FP_NAN: \ 8291 aarch64_set_FPSR (cpu, IO); \ 8292 if (signbit (F)) \ 8293 VALUE = ITYPE##_MAX; \ 8294 else \ 8295 VALUE = ITYPE##_MIN; \ 8296 break; \ 8297 \ 8298 case FP_NORMAL: \ 8299 if (F >= FTYPE##_##ITYPE##_MAX) \ 8300 { \ 8301 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \ 8302 VALUE = ITYPE##_MAX; \ 8303 } \ 8304 else if (F <= FTYPE##_##ITYPE##_MIN) \ 8305 { \ 8306 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \ 8307 VALUE = ITYPE##_MIN; \ 8308 } \ 8309 break; \ 8310 \ 8311 case FP_SUBNORMAL: \ 8312 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \ 8313 VALUE = 0; \ 8314 break; \ 8315 \ 8316 default: \ 8317 case FP_ZERO: \ 8318 VALUE = 0; \ 8319 break; \ 8320 } \ 8321 } \ 8322 while (0) 8323 8324 /* 32 bit convert float to signed int truncate towards zero. */ 8325 static void 8326 fcvtszs32 (sim_cpu *cpu) 8327 { 8328 unsigned sn = INSTR (9, 5); 8329 unsigned rd = INSTR (4, 0); 8330 /* TODO : check that this rounds toward zero. */ 8331 float f = aarch64_get_FP_float (cpu, sn); 8332 int32_t value = (int32_t) f; 8333 8334 RAISE_EXCEPTIONS (f, value, FLOAT, INT); 8335 8336 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8337 /* Avoid sign extension to 64 bit. */ 8338 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); 8339 } 8340 8341 /* 64 bit convert float to signed int truncate towards zero. */ 8342 static void 8343 fcvtszs (sim_cpu *cpu) 8344 { 8345 unsigned sn = INSTR (9, 5); 8346 unsigned rd = INSTR (4, 0); 8347 float f = aarch64_get_FP_float (cpu, sn); 8348 int64_t value = (int64_t) f; 8349 8350 RAISE_EXCEPTIONS (f, value, FLOAT, LONG); 8351 8352 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8353 aarch64_set_reg_s64 (cpu, rd, NO_SP, value); 8354 } 8355 8356 /* 32 bit convert double to signed int truncate towards zero. */ 8357 static void 8358 fcvtszd32 (sim_cpu *cpu) 8359 { 8360 unsigned sn = INSTR (9, 5); 8361 unsigned rd = INSTR (4, 0); 8362 /* TODO : check that this rounds toward zero. */ 8363 double d = aarch64_get_FP_double (cpu, sn); 8364 int32_t value = (int32_t) d; 8365 8366 RAISE_EXCEPTIONS (d, value, DOUBLE, INT); 8367 8368 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8369 /* Avoid sign extension to 64 bit. */ 8370 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); 8371 } 8372 8373 /* 64 bit convert double to signed int truncate towards zero. */ 8374 static void 8375 fcvtszd (sim_cpu *cpu) 8376 { 8377 unsigned sn = INSTR (9, 5); 8378 unsigned rd = INSTR (4, 0); 8379 /* TODO : check that this rounds toward zero. */ 8380 double d = aarch64_get_FP_double (cpu, sn); 8381 int64_t value; 8382 8383 value = (int64_t) d; 8384 8385 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG); 8386 8387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8388 aarch64_set_reg_s64 (cpu, rd, NO_SP, value); 8389 } 8390 8391 static void 8392 do_fcvtzu (sim_cpu *cpu) 8393 { 8394 /* instr[31] = size: 32-bit (0), 64-bit (1) 8395 instr[30,23] = 00111100 8396 instr[22] = type: single (0)/ double (1) 8397 instr[21] = enable (0)/disable(1) precision 8398 instr[20,16] = 11001 8399 instr[15,10] = precision 8400 instr[9,5] = Rs 8401 instr[4,0] = Rd. */ 8402 8403 unsigned rs = INSTR (9, 5); 8404 unsigned rd = INSTR (4, 0); 8405 8406 NYI_assert (30, 23, 0x3C); 8407 NYI_assert (20, 16, 0x19); 8408 8409 if (INSTR (21, 21) != 1) 8410 /* Convert to fixed point. */ 8411 HALT_NYI; 8412 8413 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8414 if (INSTR (31, 31)) 8415 { 8416 /* Convert to unsigned 64-bit integer. */ 8417 if (INSTR (22, 22)) 8418 { 8419 double d = aarch64_get_FP_double (cpu, rs); 8420 uint64_t value = (uint64_t) d; 8421 8422 /* Do not raise an exception if we have reached ULONG_MAX. */ 8423 if (value != (1UL << 63)) 8424 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG); 8425 8426 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 8427 } 8428 else 8429 { 8430 float f = aarch64_get_FP_float (cpu, rs); 8431 uint64_t value = (uint64_t) f; 8432 8433 /* Do not raise an exception if we have reached ULONG_MAX. */ 8434 if (value != (1UL << 63)) 8435 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG); 8436 8437 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 8438 } 8439 } 8440 else 8441 { 8442 uint32_t value; 8443 8444 /* Convert to unsigned 32-bit integer. */ 8445 if (INSTR (22, 22)) 8446 { 8447 double d = aarch64_get_FP_double (cpu, rs); 8448 8449 value = (uint32_t) d; 8450 /* Do not raise an exception if we have reached UINT_MAX. */ 8451 if (value != (1UL << 31)) 8452 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT); 8453 } 8454 else 8455 { 8456 float f = aarch64_get_FP_float (cpu, rs); 8457 8458 value = (uint32_t) f; 8459 /* Do not raise an exception if we have reached UINT_MAX. */ 8460 if (value != (1UL << 31)) 8461 RAISE_EXCEPTIONS (f, value, FLOAT, UINT); 8462 } 8463 8464 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 8465 } 8466 } 8467 8468 static void 8469 do_UCVTF (sim_cpu *cpu) 8470 { 8471 /* instr[31] = size: 32-bit (0), 64-bit (1) 8472 instr[30,23] = 001 1110 0 8473 instr[22] = type: single (0)/ double (1) 8474 instr[21] = enable (0)/disable(1) precision 8475 instr[20,16] = 0 0011 8476 instr[15,10] = precision 8477 instr[9,5] = Rs 8478 instr[4,0] = Rd. */ 8479 8480 unsigned rs = INSTR (9, 5); 8481 unsigned rd = INSTR (4, 0); 8482 8483 NYI_assert (30, 23, 0x3C); 8484 NYI_assert (20, 16, 0x03); 8485 8486 if (INSTR (21, 21) != 1) 8487 HALT_NYI; 8488 8489 /* FIXME: Add exception raising. */ 8490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8491 if (INSTR (31, 31)) 8492 { 8493 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP); 8494 8495 if (INSTR (22, 22)) 8496 aarch64_set_FP_double (cpu, rd, (double) value); 8497 else 8498 aarch64_set_FP_float (cpu, rd, (float) value); 8499 } 8500 else 8501 { 8502 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP); 8503 8504 if (INSTR (22, 22)) 8505 aarch64_set_FP_double (cpu, rd, (double) value); 8506 else 8507 aarch64_set_FP_float (cpu, rd, (float) value); 8508 } 8509 } 8510 8511 static void 8512 float_vector_move (sim_cpu *cpu) 8513 { 8514 /* instr[31,17] == 100 1111 0101 0111 8515 instr[16] ==> direction 0=> to GR, 1=> from GR 8516 instr[15,10] => ??? 8517 instr[9,5] ==> source 8518 instr[4,0] ==> dest. */ 8519 8520 unsigned rn = INSTR (9, 5); 8521 unsigned rd = INSTR (4, 0); 8522 8523 NYI_assert (31, 17, 0x4F57); 8524 8525 if (INSTR (15, 10) != 0) 8526 HALT_UNALLOC; 8527 8528 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8529 if (INSTR (16, 16)) 8530 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 8531 else 8532 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1)); 8533 } 8534 8535 static void 8536 dexSimpleFPIntegerConvert (sim_cpu *cpu) 8537 { 8538 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 8539 instr[30 = 0 8540 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC 8541 instr[28,25] = 1111 8542 instr[24] = 0 8543 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC 8544 instr[21] = 1 8545 instr[20,19] = rmode 8546 instr[18,16] = opcode 8547 instr[15,10] = 10 0000 */ 8548 8549 uint32_t rmode_opcode; 8550 uint32_t size_type; 8551 uint32_t type; 8552 uint32_t size; 8553 uint32_t S; 8554 8555 if (INSTR (31, 17) == 0x4F57) 8556 { 8557 float_vector_move (cpu); 8558 return; 8559 } 8560 8561 size = INSTR (31, 31); 8562 S = INSTR (29, 29); 8563 if (S != 0) 8564 HALT_UNALLOC; 8565 8566 type = INSTR (23, 22); 8567 if (type > 1) 8568 HALT_UNALLOC; 8569 8570 rmode_opcode = INSTR (20, 16); 8571 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */ 8572 8573 switch (rmode_opcode) 8574 { 8575 case 2: /* SCVTF. */ 8576 switch (size_type) 8577 { 8578 case 0: scvtf32 (cpu); return; 8579 case 1: scvtd32 (cpu); return; 8580 case 2: scvtf (cpu); return; 8581 case 3: scvtd (cpu); return; 8582 } 8583 8584 case 6: /* FMOV GR, Vec. */ 8585 switch (size_type) 8586 { 8587 case 0: gfmovs (cpu); return; 8588 case 3: gfmovd (cpu); return; 8589 default: HALT_UNALLOC; 8590 } 8591 8592 case 7: /* FMOV vec, GR. */ 8593 switch (size_type) 8594 { 8595 case 0: fgmovs (cpu); return; 8596 case 3: fgmovd (cpu); return; 8597 default: HALT_UNALLOC; 8598 } 8599 8600 case 24: /* FCVTZS. */ 8601 switch (size_type) 8602 { 8603 case 0: fcvtszs32 (cpu); return; 8604 case 1: fcvtszd32 (cpu); return; 8605 case 2: fcvtszs (cpu); return; 8606 case 3: fcvtszd (cpu); return; 8607 } 8608 8609 case 25: do_fcvtzu (cpu); return; 8610 case 3: do_UCVTF (cpu); return; 8611 8612 case 0: /* FCVTNS. */ 8613 case 1: /* FCVTNU. */ 8614 case 4: /* FCVTAS. */ 8615 case 5: /* FCVTAU. */ 8616 case 8: /* FCVPTS. */ 8617 case 9: /* FCVTPU. */ 8618 case 16: /* FCVTMS. */ 8619 case 17: /* FCVTMU. */ 8620 default: 8621 HALT_NYI; 8622 } 8623 } 8624 8625 static void 8626 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2) 8627 { 8628 uint32_t flags; 8629 8630 /* FIXME: Add exception raising. */ 8631 if (isnan (fvalue1) || isnan (fvalue2)) 8632 flags = C|V; 8633 else if (isinf (fvalue1) && isinf (fvalue2)) 8634 { 8635 /* Subtracting two infinities may give a NaN. We only need to compare 8636 the signs, which we can get from isinf. */ 8637 int result = isinf (fvalue1) - isinf (fvalue2); 8638 8639 if (result == 0) 8640 flags = Z|C; 8641 else if (result < 0) 8642 flags = N; 8643 else /* (result > 0). */ 8644 flags = C; 8645 } 8646 else 8647 { 8648 float result = fvalue1 - fvalue2; 8649 8650 if (result == 0.0) 8651 flags = Z|C; 8652 else if (result < 0) 8653 flags = N; 8654 else /* (result > 0). */ 8655 flags = C; 8656 } 8657 8658 aarch64_set_CPSR (cpu, flags); 8659 } 8660 8661 static void 8662 fcmps (sim_cpu *cpu) 8663 { 8664 unsigned sm = INSTR (20, 16); 8665 unsigned sn = INSTR ( 9, 5); 8666 8667 float fvalue1 = aarch64_get_FP_float (cpu, sn); 8668 float fvalue2 = aarch64_get_FP_float (cpu, sm); 8669 8670 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8671 set_flags_for_float_compare (cpu, fvalue1, fvalue2); 8672 } 8673 8674 /* Float compare to zero -- Invalid Operation exception 8675 only on signaling NaNs. */ 8676 static void 8677 fcmpzs (sim_cpu *cpu) 8678 { 8679 unsigned sn = INSTR ( 9, 5); 8680 float fvalue1 = aarch64_get_FP_float (cpu, sn); 8681 8682 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8683 set_flags_for_float_compare (cpu, fvalue1, 0.0f); 8684 } 8685 8686 /* Float compare -- Invalid Operation exception on all NaNs. */ 8687 static void 8688 fcmpes (sim_cpu *cpu) 8689 { 8690 unsigned sm = INSTR (20, 16); 8691 unsigned sn = INSTR ( 9, 5); 8692 8693 float fvalue1 = aarch64_get_FP_float (cpu, sn); 8694 float fvalue2 = aarch64_get_FP_float (cpu, sm); 8695 8696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8697 set_flags_for_float_compare (cpu, fvalue1, fvalue2); 8698 } 8699 8700 /* Float compare to zero -- Invalid Operation exception on all NaNs. */ 8701 static void 8702 fcmpzes (sim_cpu *cpu) 8703 { 8704 unsigned sn = INSTR ( 9, 5); 8705 float fvalue1 = aarch64_get_FP_float (cpu, sn); 8706 8707 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8708 set_flags_for_float_compare (cpu, fvalue1, 0.0f); 8709 } 8710 8711 static void 8712 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2) 8713 { 8714 uint32_t flags; 8715 8716 /* FIXME: Add exception raising. */ 8717 if (isnan (dval1) || isnan (dval2)) 8718 flags = C|V; 8719 else if (isinf (dval1) && isinf (dval2)) 8720 { 8721 /* Subtracting two infinities may give a NaN. We only need to compare 8722 the signs, which we can get from isinf. */ 8723 int result = isinf (dval1) - isinf (dval2); 8724 8725 if (result == 0) 8726 flags = Z|C; 8727 else if (result < 0) 8728 flags = N; 8729 else /* (result > 0). */ 8730 flags = C; 8731 } 8732 else 8733 { 8734 double result = dval1 - dval2; 8735 8736 if (result == 0.0) 8737 flags = Z|C; 8738 else if (result < 0) 8739 flags = N; 8740 else /* (result > 0). */ 8741 flags = C; 8742 } 8743 8744 aarch64_set_CPSR (cpu, flags); 8745 } 8746 8747 /* Double compare -- Invalid Operation exception only on signaling NaNs. */ 8748 static void 8749 fcmpd (sim_cpu *cpu) 8750 { 8751 unsigned sm = INSTR (20, 16); 8752 unsigned sn = INSTR ( 9, 5); 8753 8754 double dvalue1 = aarch64_get_FP_double (cpu, sn); 8755 double dvalue2 = aarch64_get_FP_double (cpu, sm); 8756 8757 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8758 set_flags_for_double_compare (cpu, dvalue1, dvalue2); 8759 } 8760 8761 /* Double compare to zero -- Invalid Operation exception 8762 only on signaling NaNs. */ 8763 static void 8764 fcmpzd (sim_cpu *cpu) 8765 { 8766 unsigned sn = INSTR ( 9, 5); 8767 double dvalue1 = aarch64_get_FP_double (cpu, sn); 8768 8769 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8770 set_flags_for_double_compare (cpu, dvalue1, 0.0); 8771 } 8772 8773 /* Double compare -- Invalid Operation exception on all NaNs. */ 8774 static void 8775 fcmped (sim_cpu *cpu) 8776 { 8777 unsigned sm = INSTR (20, 16); 8778 unsigned sn = INSTR ( 9, 5); 8779 8780 double dvalue1 = aarch64_get_FP_double (cpu, sn); 8781 double dvalue2 = aarch64_get_FP_double (cpu, sm); 8782 8783 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8784 set_flags_for_double_compare (cpu, dvalue1, dvalue2); 8785 } 8786 8787 /* Double compare to zero -- Invalid Operation exception on all NaNs. */ 8788 static void 8789 fcmpzed (sim_cpu *cpu) 8790 { 8791 unsigned sn = INSTR ( 9, 5); 8792 double dvalue1 = aarch64_get_FP_double (cpu, sn); 8793 8794 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8795 set_flags_for_double_compare (cpu, dvalue1, 0.0); 8796 } 8797 8798 static void 8799 dexSimpleFPCompare (sim_cpu *cpu) 8800 { 8801 /* assert instr[28,25] == 1111 8802 instr[30:24:21:13,10] = 0011000 8803 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC 8804 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC 8805 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC 8806 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC 8807 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE, 8808 01000 ==> FCMPZ, 11000 ==> FCMPEZ, 8809 ow ==> UNALLOC */ 8810 uint32_t dispatch; 8811 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); 8812 uint32_t type = INSTR (23, 22); 8813 uint32_t op = INSTR (15, 14); 8814 uint32_t op2_2_0 = INSTR (2, 0); 8815 8816 if (op2_2_0 != 0) 8817 HALT_UNALLOC; 8818 8819 if (M_S != 0) 8820 HALT_UNALLOC; 8821 8822 if (type > 1) 8823 HALT_UNALLOC; 8824 8825 if (op != 0) 8826 HALT_UNALLOC; 8827 8828 /* dispatch on type and top 2 bits of opcode. */ 8829 dispatch = (type << 2) | INSTR (4, 3); 8830 8831 switch (dispatch) 8832 { 8833 case 0: fcmps (cpu); return; 8834 case 1: fcmpzs (cpu); return; 8835 case 2: fcmpes (cpu); return; 8836 case 3: fcmpzes (cpu); return; 8837 case 4: fcmpd (cpu); return; 8838 case 5: fcmpzd (cpu); return; 8839 case 6: fcmped (cpu); return; 8840 case 7: fcmpzed (cpu); return; 8841 } 8842 } 8843 8844 static void 8845 do_scalar_FADDP (sim_cpu *cpu) 8846 { 8847 /* instr [31,23] = 0111 1110 0 8848 instr [22] = single(0)/double(1) 8849 instr [21,10] = 11 0000 1101 10 8850 instr [9,5] = Fn 8851 instr [4,0] = Fd. */ 8852 8853 unsigned Fn = INSTR (9, 5); 8854 unsigned Fd = INSTR (4, 0); 8855 8856 NYI_assert (31, 23, 0x0FC); 8857 NYI_assert (21, 10, 0xC36); 8858 8859 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8860 if (INSTR (22, 22)) 8861 { 8862 double val1 = aarch64_get_vec_double (cpu, Fn, 0); 8863 double val2 = aarch64_get_vec_double (cpu, Fn, 1); 8864 8865 aarch64_set_FP_double (cpu, Fd, val1 + val2); 8866 } 8867 else 8868 { 8869 float val1 = aarch64_get_vec_float (cpu, Fn, 0); 8870 float val2 = aarch64_get_vec_float (cpu, Fn, 1); 8871 8872 aarch64_set_FP_float (cpu, Fd, val1 + val2); 8873 } 8874 } 8875 8876 /* Floating point absolute difference. */ 8877 8878 static void 8879 do_scalar_FABD (sim_cpu *cpu) 8880 { 8881 /* instr [31,23] = 0111 1110 1 8882 instr [22] = float(0)/double(1) 8883 instr [21] = 1 8884 instr [20,16] = Rm 8885 instr [15,10] = 1101 01 8886 instr [9, 5] = Rn 8887 instr [4, 0] = Rd. */ 8888 8889 unsigned rm = INSTR (20, 16); 8890 unsigned rn = INSTR (9, 5); 8891 unsigned rd = INSTR (4, 0); 8892 8893 NYI_assert (31, 23, 0x0FD); 8894 NYI_assert (21, 21, 1); 8895 NYI_assert (15, 10, 0x35); 8896 8897 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8898 if (INSTR (22, 22)) 8899 aarch64_set_FP_double (cpu, rd, 8900 fabs (aarch64_get_FP_double (cpu, rn) 8901 - aarch64_get_FP_double (cpu, rm))); 8902 else 8903 aarch64_set_FP_float (cpu, rd, 8904 fabsf (aarch64_get_FP_float (cpu, rn) 8905 - aarch64_get_FP_float (cpu, rm))); 8906 } 8907 8908 static void 8909 do_scalar_CMGT (sim_cpu *cpu) 8910 { 8911 /* instr [31,21] = 0101 1110 111 8912 instr [20,16] = Rm 8913 instr [15,10] = 00 1101 8914 instr [9, 5] = Rn 8915 instr [4, 0] = Rd. */ 8916 8917 unsigned rm = INSTR (20, 16); 8918 unsigned rn = INSTR (9, 5); 8919 unsigned rd = INSTR (4, 0); 8920 8921 NYI_assert (31, 21, 0x2F7); 8922 NYI_assert (15, 10, 0x0D); 8923 8924 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8925 aarch64_set_vec_u64 (cpu, rd, 0, 8926 aarch64_get_vec_u64 (cpu, rn, 0) > 8927 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L); 8928 } 8929 8930 static void 8931 do_scalar_USHR (sim_cpu *cpu) 8932 { 8933 /* instr [31,23] = 0111 1111 0 8934 instr [22,16] = shift amount 8935 instr [15,10] = 0000 01 8936 instr [9, 5] = Rn 8937 instr [4, 0] = Rd. */ 8938 8939 unsigned amount = 128 - INSTR (22, 16); 8940 unsigned rn = INSTR (9, 5); 8941 unsigned rd = INSTR (4, 0); 8942 8943 NYI_assert (31, 23, 0x0FE); 8944 NYI_assert (15, 10, 0x01); 8945 8946 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8947 aarch64_set_vec_u64 (cpu, rd, 0, 8948 aarch64_get_vec_u64 (cpu, rn, 0) >> amount); 8949 } 8950 8951 static void 8952 do_scalar_SSHL (sim_cpu *cpu) 8953 { 8954 /* instr [31,21] = 0101 1110 111 8955 instr [20,16] = Rm 8956 instr [15,10] = 0100 01 8957 instr [9, 5] = Rn 8958 instr [4, 0] = Rd. */ 8959 8960 unsigned rm = INSTR (20, 16); 8961 unsigned rn = INSTR (9, 5); 8962 unsigned rd = INSTR (4, 0); 8963 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0); 8964 8965 NYI_assert (31, 21, 0x2F7); 8966 NYI_assert (15, 10, 0x11); 8967 8968 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8969 if (shift >= 0) 8970 aarch64_set_vec_s64 (cpu, rd, 0, 8971 aarch64_get_vec_s64 (cpu, rn, 0) << shift); 8972 else 8973 aarch64_set_vec_s64 (cpu, rd, 0, 8974 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift); 8975 } 8976 8977 /* Floating point scalar compare greater than or equal to 0. */ 8978 static void 8979 do_scalar_FCMGE_zero (sim_cpu *cpu) 8980 { 8981 /* instr [31,23] = 0111 1110 1 8982 instr [22,22] = size 8983 instr [21,16] = 1000 00 8984 instr [15,10] = 1100 10 8985 instr [9, 5] = Rn 8986 instr [4, 0] = Rd. */ 8987 8988 unsigned size = INSTR (22, 22); 8989 unsigned rn = INSTR (9, 5); 8990 unsigned rd = INSTR (4, 0); 8991 8992 NYI_assert (31, 23, 0x0FD); 8993 NYI_assert (21, 16, 0x20); 8994 NYI_assert (15, 10, 0x32); 8995 8996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8997 if (size) 8998 aarch64_set_vec_u64 (cpu, rd, 0, 8999 aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0); 9000 else 9001 aarch64_set_vec_u32 (cpu, rd, 0, 9002 aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0); 9003 } 9004 9005 /* Floating point scalar compare less than or equal to 0. */ 9006 static void 9007 do_scalar_FCMLE_zero (sim_cpu *cpu) 9008 { 9009 /* instr [31,23] = 0111 1110 1 9010 instr [22,22] = size 9011 instr [21,16] = 1000 00 9012 instr [15,10] = 1101 10 9013 instr [9, 5] = Rn 9014 instr [4, 0] = Rd. */ 9015 9016 unsigned size = INSTR (22, 22); 9017 unsigned rn = INSTR (9, 5); 9018 unsigned rd = INSTR (4, 0); 9019 9020 NYI_assert (31, 23, 0x0FD); 9021 NYI_assert (21, 16, 0x20); 9022 NYI_assert (15, 10, 0x36); 9023 9024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9025 if (size) 9026 aarch64_set_vec_u64 (cpu, rd, 0, 9027 aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0); 9028 else 9029 aarch64_set_vec_u32 (cpu, rd, 0, 9030 aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0); 9031 } 9032 9033 /* Floating point scalar compare greater than 0. */ 9034 static void 9035 do_scalar_FCMGT_zero (sim_cpu *cpu) 9036 { 9037 /* instr [31,23] = 0101 1110 1 9038 instr [22,22] = size 9039 instr [21,16] = 1000 00 9040 instr [15,10] = 1100 10 9041 instr [9, 5] = Rn 9042 instr [4, 0] = Rd. */ 9043 9044 unsigned size = INSTR (22, 22); 9045 unsigned rn = INSTR (9, 5); 9046 unsigned rd = INSTR (4, 0); 9047 9048 NYI_assert (31, 23, 0x0BD); 9049 NYI_assert (21, 16, 0x20); 9050 NYI_assert (15, 10, 0x32); 9051 9052 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9053 if (size) 9054 aarch64_set_vec_u64 (cpu, rd, 0, 9055 aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0); 9056 else 9057 aarch64_set_vec_u32 (cpu, rd, 0, 9058 aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0); 9059 } 9060 9061 /* Floating point scalar compare equal to 0. */ 9062 static void 9063 do_scalar_FCMEQ_zero (sim_cpu *cpu) 9064 { 9065 /* instr [31,23] = 0101 1110 1 9066 instr [22,22] = size 9067 instr [21,16] = 1000 00 9068 instr [15,10] = 1101 10 9069 instr [9, 5] = Rn 9070 instr [4, 0] = Rd. */ 9071 9072 unsigned size = INSTR (22, 22); 9073 unsigned rn = INSTR (9, 5); 9074 unsigned rd = INSTR (4, 0); 9075 9076 NYI_assert (31, 23, 0x0BD); 9077 NYI_assert (21, 16, 0x20); 9078 NYI_assert (15, 10, 0x36); 9079 9080 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9081 if (size) 9082 aarch64_set_vec_u64 (cpu, rd, 0, 9083 aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0); 9084 else 9085 aarch64_set_vec_u32 (cpu, rd, 0, 9086 aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0); 9087 } 9088 9089 /* Floating point scalar compare less than 0. */ 9090 static void 9091 do_scalar_FCMLT_zero (sim_cpu *cpu) 9092 { 9093 /* instr [31,23] = 0101 1110 1 9094 instr [22,22] = size 9095 instr [21,16] = 1000 00 9096 instr [15,10] = 1110 10 9097 instr [9, 5] = Rn 9098 instr [4, 0] = Rd. */ 9099 9100 unsigned size = INSTR (22, 22); 9101 unsigned rn = INSTR (9, 5); 9102 unsigned rd = INSTR (4, 0); 9103 9104 NYI_assert (31, 23, 0x0BD); 9105 NYI_assert (21, 16, 0x20); 9106 NYI_assert (15, 10, 0x3A); 9107 9108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9109 if (size) 9110 aarch64_set_vec_u64 (cpu, rd, 0, 9111 aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0); 9112 else 9113 aarch64_set_vec_u32 (cpu, rd, 0, 9114 aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0); 9115 } 9116 9117 static void 9118 do_scalar_shift (sim_cpu *cpu) 9119 { 9120 /* instr [31,23] = 0101 1111 0 9121 instr [22,16] = shift amount 9122 instr [15,10] = 0101 01 [SHL] 9123 instr [15,10] = 0000 01 [SSHR] 9124 instr [9, 5] = Rn 9125 instr [4, 0] = Rd. */ 9126 9127 unsigned rn = INSTR (9, 5); 9128 unsigned rd = INSTR (4, 0); 9129 unsigned amount; 9130 9131 NYI_assert (31, 23, 0x0BE); 9132 9133 if (INSTR (22, 22) == 0) 9134 HALT_UNALLOC; 9135 9136 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9137 switch (INSTR (15, 10)) 9138 { 9139 case 0x01: /* SSHR */ 9140 amount = 128 - INSTR (22, 16); 9141 aarch64_set_vec_s64 (cpu, rd, 0, 9142 aarch64_get_vec_s64 (cpu, rn, 0) >> amount); 9143 return; 9144 case 0x15: /* SHL */ 9145 amount = INSTR (22, 16) - 64; 9146 aarch64_set_vec_u64 (cpu, rd, 0, 9147 aarch64_get_vec_u64 (cpu, rn, 0) << amount); 9148 return; 9149 default: 9150 HALT_NYI; 9151 } 9152 } 9153 9154 /* FCMEQ FCMGT FCMGE. */ 9155 static void 9156 do_scalar_FCM (sim_cpu *cpu) 9157 { 9158 /* instr [31,30] = 01 9159 instr [29] = U 9160 instr [28,24] = 1 1110 9161 instr [23] = E 9162 instr [22] = size 9163 instr [21] = 1 9164 instr [20,16] = Rm 9165 instr [15,12] = 1110 9166 instr [11] = AC 9167 instr [10] = 1 9168 instr [9, 5] = Rn 9169 instr [4, 0] = Rd. */ 9170 9171 unsigned rm = INSTR (20, 16); 9172 unsigned rn = INSTR (9, 5); 9173 unsigned rd = INSTR (4, 0); 9174 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11); 9175 unsigned result; 9176 float val1; 9177 float val2; 9178 9179 NYI_assert (31, 30, 1); 9180 NYI_assert (28, 24, 0x1E); 9181 NYI_assert (21, 21, 1); 9182 NYI_assert (15, 12, 0xE); 9183 NYI_assert (10, 10, 1); 9184 9185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9186 if (INSTR (22, 22)) 9187 { 9188 double val1 = aarch64_get_FP_double (cpu, rn); 9189 double val2 = aarch64_get_FP_double (cpu, rm); 9190 9191 switch (EUac) 9192 { 9193 case 0: /* 000 */ 9194 result = val1 == val2; 9195 break; 9196 9197 case 3: /* 011 */ 9198 val1 = fabs (val1); 9199 val2 = fabs (val2); 9200 /* Fall through. */ 9201 case 2: /* 010 */ 9202 result = val1 >= val2; 9203 break; 9204 9205 case 7: /* 111 */ 9206 val1 = fabs (val1); 9207 val2 = fabs (val2); 9208 /* Fall through. */ 9209 case 6: /* 110 */ 9210 result = val1 > val2; 9211 break; 9212 9213 default: 9214 HALT_UNALLOC; 9215 } 9216 9217 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); 9218 return; 9219 } 9220 9221 val1 = aarch64_get_FP_float (cpu, rn); 9222 val2 = aarch64_get_FP_float (cpu, rm); 9223 9224 switch (EUac) 9225 { 9226 case 0: /* 000 */ 9227 result = val1 == val2; 9228 break; 9229 9230 case 3: /* 011 */ 9231 val1 = fabsf (val1); 9232 val2 = fabsf (val2); 9233 /* Fall through. */ 9234 case 2: /* 010 */ 9235 result = val1 >= val2; 9236 break; 9237 9238 case 7: /* 111 */ 9239 val1 = fabsf (val1); 9240 val2 = fabsf (val2); 9241 /* Fall through. */ 9242 case 6: /* 110 */ 9243 result = val1 > val2; 9244 break; 9245 9246 default: 9247 HALT_UNALLOC; 9248 } 9249 9250 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); 9251 } 9252 9253 /* An alias of DUP. */ 9254 static void 9255 do_scalar_MOV (sim_cpu *cpu) 9256 { 9257 /* instr [31,21] = 0101 1110 000 9258 instr [20,16] = imm5 9259 instr [15,10] = 0000 01 9260 instr [9, 5] = Rn 9261 instr [4, 0] = Rd. */ 9262 9263 unsigned rn = INSTR (9, 5); 9264 unsigned rd = INSTR (4, 0); 9265 unsigned index; 9266 9267 NYI_assert (31, 21, 0x2F0); 9268 NYI_assert (15, 10, 0x01); 9269 9270 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9271 if (INSTR (16, 16)) 9272 { 9273 /* 8-bit. */ 9274 index = INSTR (20, 17); 9275 aarch64_set_vec_u8 9276 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index)); 9277 } 9278 else if (INSTR (17, 17)) 9279 { 9280 /* 16-bit. */ 9281 index = INSTR (20, 18); 9282 aarch64_set_vec_u16 9283 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index)); 9284 } 9285 else if (INSTR (18, 18)) 9286 { 9287 /* 32-bit. */ 9288 index = INSTR (20, 19); 9289 aarch64_set_vec_u32 9290 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index)); 9291 } 9292 else if (INSTR (19, 19)) 9293 { 9294 /* 64-bit. */ 9295 index = INSTR (20, 20); 9296 aarch64_set_vec_u64 9297 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index)); 9298 } 9299 else 9300 HALT_UNALLOC; 9301 } 9302 9303 static void 9304 do_scalar_NEG (sim_cpu *cpu) 9305 { 9306 /* instr [31,10] = 0111 1110 1110 0000 1011 10 9307 instr [9, 5] = Rn 9308 instr [4, 0] = Rd. */ 9309 9310 unsigned rn = INSTR (9, 5); 9311 unsigned rd = INSTR (4, 0); 9312 9313 NYI_assert (31, 10, 0x1FB82E); 9314 9315 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9316 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0)); 9317 } 9318 9319 static void 9320 do_scalar_USHL (sim_cpu *cpu) 9321 { 9322 /* instr [31,21] = 0111 1110 111 9323 instr [20,16] = Rm 9324 instr [15,10] = 0100 01 9325 instr [9, 5] = Rn 9326 instr [4, 0] = Rd. */ 9327 9328 unsigned rm = INSTR (20, 16); 9329 unsigned rn = INSTR (9, 5); 9330 unsigned rd = INSTR (4, 0); 9331 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0); 9332 9333 NYI_assert (31, 21, 0x3F7); 9334 NYI_assert (15, 10, 0x11); 9335 9336 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9337 if (shift >= 0) 9338 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift); 9339 else 9340 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift); 9341 } 9342 9343 static void 9344 do_double_add (sim_cpu *cpu) 9345 { 9346 /* instr [31,21] = 0101 1110 111 9347 instr [20,16] = Fn 9348 instr [15,10] = 1000 01 9349 instr [9,5] = Fm 9350 instr [4,0] = Fd. */ 9351 unsigned Fd; 9352 unsigned Fm; 9353 unsigned Fn; 9354 double val1; 9355 double val2; 9356 9357 NYI_assert (31, 21, 0x2F7); 9358 NYI_assert (15, 10, 0x21); 9359 9360 Fd = INSTR (4, 0); 9361 Fm = INSTR (9, 5); 9362 Fn = INSTR (20, 16); 9363 9364 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9365 val1 = aarch64_get_FP_double (cpu, Fm); 9366 val2 = aarch64_get_FP_double (cpu, Fn); 9367 9368 aarch64_set_FP_double (cpu, Fd, val1 + val2); 9369 } 9370 9371 static void 9372 do_scalar_UCVTF (sim_cpu *cpu) 9373 { 9374 /* instr [31,23] = 0111 1110 0 9375 instr [22] = single(0)/double(1) 9376 instr [21,10] = 10 0001 1101 10 9377 instr [9,5] = rn 9378 instr [4,0] = rd. */ 9379 9380 unsigned rn = INSTR (9, 5); 9381 unsigned rd = INSTR (4, 0); 9382 9383 NYI_assert (31, 23, 0x0FC); 9384 NYI_assert (21, 10, 0x876); 9385 9386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9387 if (INSTR (22, 22)) 9388 { 9389 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0); 9390 9391 aarch64_set_vec_double (cpu, rd, 0, (double) val); 9392 } 9393 else 9394 { 9395 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0); 9396 9397 aarch64_set_vec_float (cpu, rd, 0, (float) val); 9398 } 9399 } 9400 9401 static void 9402 do_scalar_vec (sim_cpu *cpu) 9403 { 9404 /* instr [30] = 1. */ 9405 /* instr [28,25] = 1111. */ 9406 switch (INSTR (31, 23)) 9407 { 9408 case 0xBC: 9409 switch (INSTR (15, 10)) 9410 { 9411 case 0x01: do_scalar_MOV (cpu); return; 9412 case 0x39: do_scalar_FCM (cpu); return; 9413 case 0x3B: do_scalar_FCM (cpu); return; 9414 } 9415 break; 9416 9417 case 0xBE: do_scalar_shift (cpu); return; 9418 9419 case 0xFC: 9420 switch (INSTR (15, 10)) 9421 { 9422 case 0x36: 9423 switch (INSTR (21, 16)) 9424 { 9425 case 0x30: do_scalar_FADDP (cpu); return; 9426 case 0x21: do_scalar_UCVTF (cpu); return; 9427 } 9428 HALT_NYI; 9429 case 0x39: do_scalar_FCM (cpu); return; 9430 case 0x3B: do_scalar_FCM (cpu); return; 9431 } 9432 break; 9433 9434 case 0xFD: 9435 switch (INSTR (15, 10)) 9436 { 9437 case 0x0D: do_scalar_CMGT (cpu); return; 9438 case 0x11: do_scalar_USHL (cpu); return; 9439 case 0x2E: do_scalar_NEG (cpu); return; 9440 case 0x32: do_scalar_FCMGE_zero (cpu); return; 9441 case 0x35: do_scalar_FABD (cpu); return; 9442 case 0x36: do_scalar_FCMLE_zero (cpu); return; 9443 case 0x39: do_scalar_FCM (cpu); return; 9444 case 0x3B: do_scalar_FCM (cpu); return; 9445 default: 9446 HALT_NYI; 9447 } 9448 9449 case 0xFE: do_scalar_USHR (cpu); return; 9450 9451 case 0xBD: 9452 switch (INSTR (15, 10)) 9453 { 9454 case 0x21: do_double_add (cpu); return; 9455 case 0x11: do_scalar_SSHL (cpu); return; 9456 case 0x32: do_scalar_FCMGT_zero (cpu); return; 9457 case 0x36: do_scalar_FCMEQ_zero (cpu); return; 9458 case 0x3A: do_scalar_FCMLT_zero (cpu); return; 9459 default: 9460 HALT_NYI; 9461 } 9462 9463 default: 9464 HALT_NYI; 9465 } 9466 } 9467 9468 static void 9469 dexAdvSIMD1 (sim_cpu *cpu) 9470 { 9471 /* instr [28,25] = 1 111. */ 9472 9473 /* We are currently only interested in the basic 9474 scalar fp routines which all have bit 30 = 0. */ 9475 if (INSTR (30, 30)) 9476 do_scalar_vec (cpu); 9477 9478 /* instr[24] is set for FP data processing 3-source and clear for 9479 all other basic scalar fp instruction groups. */ 9480 else if (INSTR (24, 24)) 9481 dexSimpleFPDataProc3Source (cpu); 9482 9483 /* instr[21] is clear for floating <-> fixed conversions and set for 9484 all other basic scalar fp instruction groups. */ 9485 else if (!INSTR (21, 21)) 9486 dexSimpleFPFixedConvert (cpu); 9487 9488 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source 9489 11 ==> cond select, 00 ==> other. */ 9490 else 9491 switch (INSTR (11, 10)) 9492 { 9493 case 1: dexSimpleFPCondCompare (cpu); return; 9494 case 2: dexSimpleFPDataProc2Source (cpu); return; 9495 case 3: dexSimpleFPCondSelect (cpu); return; 9496 9497 default: 9498 /* Now an ordered cascade of tests. 9499 FP immediate has instr [12] == 1. 9500 FP compare has instr [13] == 1. 9501 FP Data Proc 1 Source has instr [14] == 1. 9502 FP floating <--> integer conversions has instr [15] == 0. */ 9503 if (INSTR (12, 12)) 9504 dexSimpleFPImmediate (cpu); 9505 9506 else if (INSTR (13, 13)) 9507 dexSimpleFPCompare (cpu); 9508 9509 else if (INSTR (14, 14)) 9510 dexSimpleFPDataProc1Source (cpu); 9511 9512 else if (!INSTR (15, 15)) 9513 dexSimpleFPIntegerConvert (cpu); 9514 9515 else 9516 /* If we get here then instr[15] == 1 which means UNALLOC. */ 9517 HALT_UNALLOC; 9518 } 9519 } 9520 9521 /* PC relative addressing. */ 9522 9523 static void 9524 pcadr (sim_cpu *cpu) 9525 { 9526 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP 9527 instr[30,29] = immlo 9528 instr[23,5] = immhi. */ 9529 uint64_t address; 9530 unsigned rd = INSTR (4, 0); 9531 uint32_t isPage = INSTR (31, 31); 9532 union { int64_t u64; uint64_t s64; } imm; 9533 uint64_t offset; 9534 9535 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5); 9536 offset = imm.u64; 9537 offset = (offset << 2) | INSTR (30, 29); 9538 9539 address = aarch64_get_PC (cpu); 9540 9541 if (isPage) 9542 { 9543 offset <<= 12; 9544 address &= ~0xfff; 9545 } 9546 9547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9548 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset); 9549 } 9550 9551 /* Specific decode and execute for group Data Processing Immediate. */ 9552 9553 static void 9554 dexPCRelAddressing (sim_cpu *cpu) 9555 { 9556 /* assert instr[28,24] = 10000. */ 9557 pcadr (cpu); 9558 } 9559 9560 /* Immediate logical. 9561 The bimm32/64 argument is constructed by replicating a 2, 4, 8, 9562 16, 32 or 64 bit sequence pulled out at decode and possibly 9563 inverting it.. 9564 9565 N.B. the output register (dest) can normally be Xn or SP 9566 the exception occurs for flag setting instructions which may 9567 only use Xn for the output (dest). The input register can 9568 never be SP. */ 9569 9570 /* 32 bit and immediate. */ 9571 static void 9572 and32 (sim_cpu *cpu, uint32_t bimm) 9573 { 9574 unsigned rn = INSTR (9, 5); 9575 unsigned rd = INSTR (4, 0); 9576 9577 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9578 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9579 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm); 9580 } 9581 9582 /* 64 bit and immediate. */ 9583 static void 9584 and64 (sim_cpu *cpu, uint64_t bimm) 9585 { 9586 unsigned rn = INSTR (9, 5); 9587 unsigned rd = INSTR (4, 0); 9588 9589 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9590 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9591 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm); 9592 } 9593 9594 /* 32 bit and immediate set flags. */ 9595 static void 9596 ands32 (sim_cpu *cpu, uint32_t bimm) 9597 { 9598 unsigned rn = INSTR (9, 5); 9599 unsigned rd = INSTR (4, 0); 9600 9601 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 9602 uint32_t value2 = bimm; 9603 9604 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9605 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9606 set_flags_for_binop32 (cpu, value1 & value2); 9607 } 9608 9609 /* 64 bit and immediate set flags. */ 9610 static void 9611 ands64 (sim_cpu *cpu, uint64_t bimm) 9612 { 9613 unsigned rn = INSTR (9, 5); 9614 unsigned rd = INSTR (4, 0); 9615 9616 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 9617 uint64_t value2 = bimm; 9618 9619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9620 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9621 set_flags_for_binop64 (cpu, value1 & value2); 9622 } 9623 9624 /* 32 bit exclusive or immediate. */ 9625 static void 9626 eor32 (sim_cpu *cpu, uint32_t bimm) 9627 { 9628 unsigned rn = INSTR (9, 5); 9629 unsigned rd = INSTR (4, 0); 9630 9631 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9632 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9633 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm); 9634 } 9635 9636 /* 64 bit exclusive or immediate. */ 9637 static void 9638 eor64 (sim_cpu *cpu, uint64_t bimm) 9639 { 9640 unsigned rn = INSTR (9, 5); 9641 unsigned rd = INSTR (4, 0); 9642 9643 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9644 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9645 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm); 9646 } 9647 9648 /* 32 bit or immediate. */ 9649 static void 9650 orr32 (sim_cpu *cpu, uint32_t bimm) 9651 { 9652 unsigned rn = INSTR (9, 5); 9653 unsigned rd = INSTR (4, 0); 9654 9655 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9656 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9657 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm); 9658 } 9659 9660 /* 64 bit or immediate. */ 9661 static void 9662 orr64 (sim_cpu *cpu, uint64_t bimm) 9663 { 9664 unsigned rn = INSTR (9, 5); 9665 unsigned rd = INSTR (4, 0); 9666 9667 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9668 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9669 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm); 9670 } 9671 9672 /* Logical shifted register. 9673 These allow an optional LSL, ASR, LSR or ROR to the second source 9674 register with a count up to the register bit count. 9675 N.B register args may not be SP. */ 9676 9677 /* 32 bit AND shifted register. */ 9678 static void 9679 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9680 { 9681 unsigned rm = INSTR (20, 16); 9682 unsigned rn = INSTR (9, 5); 9683 unsigned rd = INSTR (4, 0); 9684 9685 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9686 aarch64_set_reg_u64 9687 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9688 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9689 } 9690 9691 /* 64 bit AND shifted register. */ 9692 static void 9693 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9694 { 9695 unsigned rm = INSTR (20, 16); 9696 unsigned rn = INSTR (9, 5); 9697 unsigned rd = INSTR (4, 0); 9698 9699 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9700 aarch64_set_reg_u64 9701 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9702 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9703 } 9704 9705 /* 32 bit AND shifted register setting flags. */ 9706 static void 9707 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9708 { 9709 unsigned rm = INSTR (20, 16); 9710 unsigned rn = INSTR (9, 5); 9711 unsigned rd = INSTR (4, 0); 9712 9713 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 9714 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 9715 shift, count); 9716 9717 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9718 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9719 set_flags_for_binop32 (cpu, value1 & value2); 9720 } 9721 9722 /* 64 bit AND shifted register setting flags. */ 9723 static void 9724 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9725 { 9726 unsigned rm = INSTR (20, 16); 9727 unsigned rn = INSTR (9, 5); 9728 unsigned rd = INSTR (4, 0); 9729 9730 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 9731 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 9732 shift, count); 9733 9734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9735 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9736 set_flags_for_binop64 (cpu, value1 & value2); 9737 } 9738 9739 /* 32 bit BIC shifted register. */ 9740 static void 9741 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9742 { 9743 unsigned rm = INSTR (20, 16); 9744 unsigned rn = INSTR (9, 5); 9745 unsigned rd = INSTR (4, 0); 9746 9747 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9748 aarch64_set_reg_u64 9749 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9750 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9751 } 9752 9753 /* 64 bit BIC shifted register. */ 9754 static void 9755 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9756 { 9757 unsigned rm = INSTR (20, 16); 9758 unsigned rn = INSTR (9, 5); 9759 unsigned rd = INSTR (4, 0); 9760 9761 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9762 aarch64_set_reg_u64 9763 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9764 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9765 } 9766 9767 /* 32 bit BIC shifted register setting flags. */ 9768 static void 9769 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9770 { 9771 unsigned rm = INSTR (20, 16); 9772 unsigned rn = INSTR (9, 5); 9773 unsigned rd = INSTR (4, 0); 9774 9775 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 9776 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 9777 shift, count); 9778 9779 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9780 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9781 set_flags_for_binop32 (cpu, value1 & value2); 9782 } 9783 9784 /* 64 bit BIC shifted register setting flags. */ 9785 static void 9786 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9787 { 9788 unsigned rm = INSTR (20, 16); 9789 unsigned rn = INSTR (9, 5); 9790 unsigned rd = INSTR (4, 0); 9791 9792 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 9793 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 9794 shift, count); 9795 9796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9797 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9798 set_flags_for_binop64 (cpu, value1 & value2); 9799 } 9800 9801 /* 32 bit EON shifted register. */ 9802 static void 9803 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9804 { 9805 unsigned rm = INSTR (20, 16); 9806 unsigned rn = INSTR (9, 5); 9807 unsigned rd = INSTR (4, 0); 9808 9809 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9810 aarch64_set_reg_u64 9811 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9812 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9813 } 9814 9815 /* 64 bit EON shifted register. */ 9816 static void 9817 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9818 { 9819 unsigned rm = INSTR (20, 16); 9820 unsigned rn = INSTR (9, 5); 9821 unsigned rd = INSTR (4, 0); 9822 9823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9824 aarch64_set_reg_u64 9825 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9826 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9827 } 9828 9829 /* 32 bit EOR shifted register. */ 9830 static void 9831 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9832 { 9833 unsigned rm = INSTR (20, 16); 9834 unsigned rn = INSTR (9, 5); 9835 unsigned rd = INSTR (4, 0); 9836 9837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9838 aarch64_set_reg_u64 9839 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9840 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9841 } 9842 9843 /* 64 bit EOR shifted register. */ 9844 static void 9845 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9846 { 9847 unsigned rm = INSTR (20, 16); 9848 unsigned rn = INSTR (9, 5); 9849 unsigned rd = INSTR (4, 0); 9850 9851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9852 aarch64_set_reg_u64 9853 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9854 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9855 } 9856 9857 /* 32 bit ORR shifted register. */ 9858 static void 9859 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9860 { 9861 unsigned rm = INSTR (20, 16); 9862 unsigned rn = INSTR (9, 5); 9863 unsigned rd = INSTR (4, 0); 9864 9865 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9866 aarch64_set_reg_u64 9867 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9868 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9869 } 9870 9871 /* 64 bit ORR shifted register. */ 9872 static void 9873 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9874 { 9875 unsigned rm = INSTR (20, 16); 9876 unsigned rn = INSTR (9, 5); 9877 unsigned rd = INSTR (4, 0); 9878 9879 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9880 aarch64_set_reg_u64 9881 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9882 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9883 } 9884 9885 /* 32 bit ORN shifted register. */ 9886 static void 9887 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9888 { 9889 unsigned rm = INSTR (20, 16); 9890 unsigned rn = INSTR (9, 5); 9891 unsigned rd = INSTR (4, 0); 9892 9893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9894 aarch64_set_reg_u64 9895 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9896 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9897 } 9898 9899 /* 64 bit ORN shifted register. */ 9900 static void 9901 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9902 { 9903 unsigned rm = INSTR (20, 16); 9904 unsigned rn = INSTR (9, 5); 9905 unsigned rd = INSTR (4, 0); 9906 9907 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9908 aarch64_set_reg_u64 9909 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9910 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9911 } 9912 9913 static void 9914 dexLogicalImmediate (sim_cpu *cpu) 9915 { 9916 /* assert instr[28,23] = 1001000 9917 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 9918 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS 9919 instr[22] = N : used to construct immediate mask 9920 instr[21,16] = immr 9921 instr[15,10] = imms 9922 instr[9,5] = Rn 9923 instr[4,0] = Rd */ 9924 9925 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ 9926 uint32_t size = INSTR (31, 31); 9927 uint32_t N = INSTR (22, 22); 9928 /* uint32_t immr = INSTR (21, 16);. */ 9929 /* uint32_t imms = INSTR (15, 10);. */ 9930 uint32_t index = INSTR (22, 10); 9931 uint64_t bimm64 = LITable [index]; 9932 uint32_t dispatch = INSTR (30, 29); 9933 9934 if (~size & N) 9935 HALT_UNALLOC; 9936 9937 if (!bimm64) 9938 HALT_UNALLOC; 9939 9940 if (size == 0) 9941 { 9942 uint32_t bimm = (uint32_t) bimm64; 9943 9944 switch (dispatch) 9945 { 9946 case 0: and32 (cpu, bimm); return; 9947 case 1: orr32 (cpu, bimm); return; 9948 case 2: eor32 (cpu, bimm); return; 9949 case 3: ands32 (cpu, bimm); return; 9950 } 9951 } 9952 else 9953 { 9954 switch (dispatch) 9955 { 9956 case 0: and64 (cpu, bimm64); return; 9957 case 1: orr64 (cpu, bimm64); return; 9958 case 2: eor64 (cpu, bimm64); return; 9959 case 3: ands64 (cpu, bimm64); return; 9960 } 9961 } 9962 HALT_UNALLOC; 9963 } 9964 9965 /* Immediate move. 9966 The uimm argument is a 16 bit value to be inserted into the 9967 target register the pos argument locates the 16 bit word in the 9968 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2, 9969 3} for 64 bit. 9970 N.B register arg may not be SP so it should be. 9971 accessed using the setGZRegisterXXX accessors. */ 9972 9973 /* 32 bit move 16 bit immediate zero remaining shorts. */ 9974 static void 9975 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos) 9976 { 9977 unsigned rd = INSTR (4, 0); 9978 9979 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9980 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16)); 9981 } 9982 9983 /* 64 bit move 16 bit immediate zero remaining shorts. */ 9984 static void 9985 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos) 9986 { 9987 unsigned rd = INSTR (4, 0); 9988 9989 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9990 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16)); 9991 } 9992 9993 /* 32 bit move 16 bit immediate negated. */ 9994 static void 9995 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos) 9996 { 9997 unsigned rd = INSTR (4, 0); 9998 9999 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10000 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU)); 10001 } 10002 10003 /* 64 bit move 16 bit immediate negated. */ 10004 static void 10005 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos) 10006 { 10007 unsigned rd = INSTR (4, 0); 10008 10009 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10010 aarch64_set_reg_u64 10011 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16)) 10012 ^ 0xffffffffffffffffULL)); 10013 } 10014 10015 /* 32 bit move 16 bit immediate keep remaining shorts. */ 10016 static void 10017 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos) 10018 { 10019 unsigned rd = INSTR (4, 0); 10020 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP); 10021 uint32_t value = val << (pos * 16); 10022 uint32_t mask = ~(0xffffU << (pos * 16)); 10023 10024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10025 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); 10026 } 10027 10028 /* 64 bit move 16 it immediate keep remaining shorts. */ 10029 static void 10030 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos) 10031 { 10032 unsigned rd = INSTR (4, 0); 10033 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP); 10034 uint64_t value = (uint64_t) val << (pos * 16); 10035 uint64_t mask = ~(0xffffULL << (pos * 16)); 10036 10037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10038 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); 10039 } 10040 10041 static void 10042 dexMoveWideImmediate (sim_cpu *cpu) 10043 { 10044 /* assert instr[28:23] = 100101 10045 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 10046 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK 10047 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48 10048 instr[20,5] = uimm16 10049 instr[4,0] = Rd */ 10050 10051 /* N.B. the (multiple of 16) shift is applied by the called routine, 10052 we just pass the multiplier. */ 10053 10054 uint32_t imm; 10055 uint32_t size = INSTR (31, 31); 10056 uint32_t op = INSTR (30, 29); 10057 uint32_t shift = INSTR (22, 21); 10058 10059 /* 32 bit can only shift 0 or 1 lot of 16. 10060 anything else is an unallocated instruction. */ 10061 if (size == 0 && (shift > 1)) 10062 HALT_UNALLOC; 10063 10064 if (op == 1) 10065 HALT_UNALLOC; 10066 10067 imm = INSTR (20, 5); 10068 10069 if (size == 0) 10070 { 10071 if (op == 0) 10072 movn32 (cpu, imm, shift); 10073 else if (op == 2) 10074 movz32 (cpu, imm, shift); 10075 else 10076 movk32 (cpu, imm, shift); 10077 } 10078 else 10079 { 10080 if (op == 0) 10081 movn64 (cpu, imm, shift); 10082 else if (op == 2) 10083 movz64 (cpu, imm, shift); 10084 else 10085 movk64 (cpu, imm, shift); 10086 } 10087 } 10088 10089 /* Bitfield operations. 10090 These take a pair of bit positions r and s which are in {0..31} 10091 or {0..63} depending on the instruction word size. 10092 N.B register args may not be SP. */ 10093 10094 /* OK, we start with ubfm which just needs to pick 10095 some bits out of source zero the rest and write 10096 the result to dest. Just need two logical shifts. */ 10097 10098 /* 32 bit bitfield move, left and right of affected zeroed 10099 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ 10100 static void 10101 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) 10102 { 10103 unsigned rd; 10104 unsigned rn = INSTR (9, 5); 10105 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 10106 10107 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ 10108 if (r <= s) 10109 { 10110 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0. 10111 We want only bits s:xxx:r at the bottom of the word 10112 so we LSL bit s up to bit 31 i.e. by 31 - s 10113 and then we LSR to bring bit 31 down to bit s - r 10114 i.e. by 31 + r - s. */ 10115 value <<= 31 - s; 10116 value >>= 31 + r - s; 10117 } 10118 else 10119 { 10120 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0 10121 We want only bits s:xxx:0 starting at it 31-(r-1) 10122 so we LSL bit s up to bit 31 i.e. by 31 - s 10123 and then we LSL to bring bit 31 down to 31-(r-1)+s 10124 i.e. by r - (s + 1). */ 10125 value <<= 31 - s; 10126 value >>= r - (s + 1); 10127 } 10128 10129 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10130 rd = INSTR (4, 0); 10131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 10132 } 10133 10134 /* 64 bit bitfield move, left and right of affected zeroed 10135 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ 10136 static void 10137 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s) 10138 { 10139 unsigned rd; 10140 unsigned rn = INSTR (9, 5); 10141 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 10142 10143 if (r <= s) 10144 { 10145 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0. 10146 We want only bits s:xxx:r at the bottom of the word. 10147 So we LSL bit s up to bit 63 i.e. by 63 - s 10148 and then we LSR to bring bit 63 down to bit s - r 10149 i.e. by 63 + r - s. */ 10150 value <<= 63 - s; 10151 value >>= 63 + r - s; 10152 } 10153 else 10154 { 10155 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0. 10156 We want only bits s:xxx:0 starting at it 63-(r-1). 10157 So we LSL bit s up to bit 63 i.e. by 63 - s 10158 and then we LSL to bring bit 63 down to 63-(r-1)+s 10159 i.e. by r - (s + 1). */ 10160 value <<= 63 - s; 10161 value >>= r - (s + 1); 10162 } 10163 10164 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10165 rd = INSTR (4, 0); 10166 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 10167 } 10168 10169 /* The signed versions need to insert sign bits 10170 on the left of the inserted bit field. so we do 10171 much the same as the unsigned version except we 10172 use an arithmetic shift right -- this just means 10173 we need to operate on signed values. */ 10174 10175 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */ 10176 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ 10177 static void 10178 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) 10179 { 10180 unsigned rd; 10181 unsigned rn = INSTR (9, 5); 10182 /* as per ubfm32 but use an ASR instead of an LSR. */ 10183 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP); 10184 10185 if (r <= s) 10186 { 10187 value <<= 31 - s; 10188 value >>= 31 + r - s; 10189 } 10190 else 10191 { 10192 value <<= 31 - s; 10193 value >>= r - (s + 1); 10194 } 10195 10196 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10197 rd = INSTR (4, 0); 10198 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); 10199 } 10200 10201 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */ 10202 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ 10203 static void 10204 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s) 10205 { 10206 unsigned rd; 10207 unsigned rn = INSTR (9, 5); 10208 /* acpu per ubfm but use an ASR instead of an LSR. */ 10209 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP); 10210 10211 if (r <= s) 10212 { 10213 value <<= 63 - s; 10214 value >>= 63 + r - s; 10215 } 10216 else 10217 { 10218 value <<= 63 - s; 10219 value >>= r - (s + 1); 10220 } 10221 10222 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10223 rd = INSTR (4, 0); 10224 aarch64_set_reg_s64 (cpu, rd, NO_SP, value); 10225 } 10226 10227 /* Finally, these versions leave non-affected bits 10228 as is. so we need to generate the bits as per 10229 ubfm and also generate a mask to pick the 10230 bits from the original and computed values. */ 10231 10232 /* 32 bit bitfield move, non-affected bits left as is. 10233 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ 10234 static void 10235 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) 10236 { 10237 unsigned rn = INSTR (9, 5); 10238 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 10239 uint32_t mask = -1; 10240 unsigned rd; 10241 uint32_t value2; 10242 10243 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ 10244 if (r <= s) 10245 { 10246 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0. 10247 We want only bits s:xxx:r at the bottom of the word 10248 so we LSL bit s up to bit 31 i.e. by 31 - s 10249 and then we LSR to bring bit 31 down to bit s - r 10250 i.e. by 31 + r - s. */ 10251 value <<= 31 - s; 10252 value >>= 31 + r - s; 10253 /* the mask must include the same bits. */ 10254 mask <<= 31 - s; 10255 mask >>= 31 + r - s; 10256 } 10257 else 10258 { 10259 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0. 10260 We want only bits s:xxx:0 starting at it 31-(r-1) 10261 so we LSL bit s up to bit 31 i.e. by 31 - s 10262 and then we LSL to bring bit 31 down to 31-(r-1)+s 10263 i.e. by r - (s + 1). */ 10264 value <<= 31 - s; 10265 value >>= r - (s + 1); 10266 /* The mask must include the same bits. */ 10267 mask <<= 31 - s; 10268 mask >>= r - (s + 1); 10269 } 10270 10271 rd = INSTR (4, 0); 10272 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP); 10273 10274 value2 &= ~mask; 10275 value2 |= value; 10276 10277 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10278 aarch64_set_reg_u64 10279 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value); 10280 } 10281 10282 /* 64 bit bitfield move, non-affected bits left as is. 10283 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ 10284 static void 10285 bfm (sim_cpu *cpu, uint32_t r, uint32_t s) 10286 { 10287 unsigned rd; 10288 unsigned rn = INSTR (9, 5); 10289 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 10290 uint64_t mask = 0xffffffffffffffffULL; 10291 10292 if (r <= s) 10293 { 10294 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0. 10295 We want only bits s:xxx:r at the bottom of the word 10296 so we LSL bit s up to bit 63 i.e. by 63 - s 10297 and then we LSR to bring bit 63 down to bit s - r 10298 i.e. by 63 + r - s. */ 10299 value <<= 63 - s; 10300 value >>= 63 + r - s; 10301 /* The mask must include the same bits. */ 10302 mask <<= 63 - s; 10303 mask >>= 63 + r - s; 10304 } 10305 else 10306 { 10307 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0 10308 We want only bits s:xxx:0 starting at it 63-(r-1) 10309 so we LSL bit s up to bit 63 i.e. by 63 - s 10310 and then we LSL to bring bit 63 down to 63-(r-1)+s 10311 i.e. by r - (s + 1). */ 10312 value <<= 63 - s; 10313 value >>= r - (s + 1); 10314 /* The mask must include the same bits. */ 10315 mask <<= 63 - s; 10316 mask >>= r - (s + 1); 10317 } 10318 10319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10320 rd = INSTR (4, 0); 10321 aarch64_set_reg_u64 10322 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value); 10323 } 10324 10325 static void 10326 dexBitfieldImmediate (sim_cpu *cpu) 10327 { 10328 /* assert instr[28:23] = 100110 10329 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 10330 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC 10331 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC 10332 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit 10333 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit 10334 instr[9,5] = Rn 10335 instr[4,0] = Rd */ 10336 10337 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ 10338 uint32_t dispatch; 10339 uint32_t imms; 10340 uint32_t size = INSTR (31, 31); 10341 uint32_t N = INSTR (22, 22); 10342 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */ 10343 /* or else we have an UNALLOC. */ 10344 uint32_t immr = INSTR (21, 16); 10345 10346 if (~size & N) 10347 HALT_UNALLOC; 10348 10349 if (!size && uimm (immr, 5, 5)) 10350 HALT_UNALLOC; 10351 10352 imms = INSTR (15, 10); 10353 if (!size && uimm (imms, 5, 5)) 10354 HALT_UNALLOC; 10355 10356 /* Switch on combined size and op. */ 10357 dispatch = INSTR (31, 29); 10358 switch (dispatch) 10359 { 10360 case 0: sbfm32 (cpu, immr, imms); return; 10361 case 1: bfm32 (cpu, immr, imms); return; 10362 case 2: ubfm32 (cpu, immr, imms); return; 10363 case 4: sbfm (cpu, immr, imms); return; 10364 case 5: bfm (cpu, immr, imms); return; 10365 case 6: ubfm (cpu, immr, imms); return; 10366 default: HALT_UNALLOC; 10367 } 10368 } 10369 10370 static void 10371 do_EXTR_32 (sim_cpu *cpu) 10372 { 10373 /* instr[31:21] = 00010011100 10374 instr[20,16] = Rm 10375 instr[15,10] = imms : 0xxxxx for 32 bit 10376 instr[9,5] = Rn 10377 instr[4,0] = Rd */ 10378 unsigned rm = INSTR (20, 16); 10379 unsigned imms = INSTR (15, 10) & 31; 10380 unsigned rn = INSTR ( 9, 5); 10381 unsigned rd = INSTR ( 4, 0); 10382 uint64_t val1; 10383 uint64_t val2; 10384 10385 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP); 10386 val1 >>= imms; 10387 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 10388 val2 <<= (32 - imms); 10389 10390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10391 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2); 10392 } 10393 10394 static void 10395 do_EXTR_64 (sim_cpu *cpu) 10396 { 10397 /* instr[31:21] = 10010011100 10398 instr[20,16] = Rm 10399 instr[15,10] = imms 10400 instr[9,5] = Rn 10401 instr[4,0] = Rd */ 10402 unsigned rm = INSTR (20, 16); 10403 unsigned imms = INSTR (15, 10) & 63; 10404 unsigned rn = INSTR ( 9, 5); 10405 unsigned rd = INSTR ( 4, 0); 10406 uint64_t val; 10407 10408 val = aarch64_get_reg_u64 (cpu, rm, NO_SP); 10409 val >>= imms; 10410 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms)); 10411 10412 aarch64_set_reg_u64 (cpu, rd, NO_SP, val); 10413 } 10414 10415 static void 10416 dexExtractImmediate (sim_cpu *cpu) 10417 { 10418 /* assert instr[28:23] = 100111 10419 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 10420 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC 10421 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC 10422 instr[21] = op0 : must be 0 or UNALLOC 10423 instr[20,16] = Rm 10424 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit 10425 instr[9,5] = Rn 10426 instr[4,0] = Rd */ 10427 10428 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ 10429 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */ 10430 uint32_t dispatch; 10431 uint32_t size = INSTR (31, 31); 10432 uint32_t N = INSTR (22, 22); 10433 /* 32 bit operations must have imms[5] = 0 10434 or else we have an UNALLOC. */ 10435 uint32_t imms = INSTR (15, 10); 10436 10437 if (size ^ N) 10438 HALT_UNALLOC; 10439 10440 if (!size && uimm (imms, 5, 5)) 10441 HALT_UNALLOC; 10442 10443 /* Switch on combined size and op. */ 10444 dispatch = INSTR (31, 29); 10445 10446 if (dispatch == 0) 10447 do_EXTR_32 (cpu); 10448 10449 else if (dispatch == 4) 10450 do_EXTR_64 (cpu); 10451 10452 else if (dispatch == 1) 10453 HALT_NYI; 10454 else 10455 HALT_UNALLOC; 10456 } 10457 10458 static void 10459 dexDPImm (sim_cpu *cpu) 10460 { 10461 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); 10462 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001 10463 bits [25,23] of a DPImm are the secondary dispatch vector. */ 10464 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu)); 10465 10466 switch (group2) 10467 { 10468 case DPIMM_PCADR_000: 10469 case DPIMM_PCADR_001: 10470 dexPCRelAddressing (cpu); 10471 return; 10472 10473 case DPIMM_ADDSUB_010: 10474 case DPIMM_ADDSUB_011: 10475 dexAddSubtractImmediate (cpu); 10476 return; 10477 10478 case DPIMM_LOG_100: 10479 dexLogicalImmediate (cpu); 10480 return; 10481 10482 case DPIMM_MOV_101: 10483 dexMoveWideImmediate (cpu); 10484 return; 10485 10486 case DPIMM_BITF_110: 10487 dexBitfieldImmediate (cpu); 10488 return; 10489 10490 case DPIMM_EXTR_111: 10491 dexExtractImmediate (cpu); 10492 return; 10493 10494 default: 10495 /* Should never reach here. */ 10496 HALT_NYI; 10497 } 10498 } 10499 10500 static void 10501 dexLoadUnscaledImmediate (sim_cpu *cpu) 10502 { 10503 /* instr[29,24] == 111_00 10504 instr[21] == 0 10505 instr[11,10] == 00 10506 instr[31,30] = size 10507 instr[26] = V 10508 instr[23,22] = opc 10509 instr[20,12] = simm9 10510 instr[9,5] = rn may be SP. */ 10511 /* unsigned rt = INSTR (4, 0); */ 10512 uint32_t V = INSTR (26, 26); 10513 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); 10514 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); 10515 10516 if (!V) 10517 { 10518 /* GReg operations. */ 10519 switch (dispatch) 10520 { 10521 case 0: sturb (cpu, imm); return; 10522 case 1: ldurb32 (cpu, imm); return; 10523 case 2: ldursb64 (cpu, imm); return; 10524 case 3: ldursb32 (cpu, imm); return; 10525 case 4: sturh (cpu, imm); return; 10526 case 5: ldurh32 (cpu, imm); return; 10527 case 6: ldursh64 (cpu, imm); return; 10528 case 7: ldursh32 (cpu, imm); return; 10529 case 8: stur32 (cpu, imm); return; 10530 case 9: ldur32 (cpu, imm); return; 10531 case 10: ldursw (cpu, imm); return; 10532 case 12: stur64 (cpu, imm); return; 10533 case 13: ldur64 (cpu, imm); return; 10534 10535 case 14: 10536 /* PRFUM NYI. */ 10537 HALT_NYI; 10538 10539 default: 10540 case 11: 10541 case 15: 10542 HALT_UNALLOC; 10543 } 10544 } 10545 10546 /* FReg operations. */ 10547 switch (dispatch) 10548 { 10549 case 2: fsturq (cpu, imm); return; 10550 case 3: fldurq (cpu, imm); return; 10551 case 8: fsturs (cpu, imm); return; 10552 case 9: fldurs (cpu, imm); return; 10553 case 12: fsturd (cpu, imm); return; 10554 case 13: fldurd (cpu, imm); return; 10555 10556 case 0: /* STUR 8 bit FP. */ 10557 case 1: /* LDUR 8 bit FP. */ 10558 case 4: /* STUR 16 bit FP. */ 10559 case 5: /* LDUR 8 bit FP. */ 10560 HALT_NYI; 10561 10562 default: 10563 case 6: 10564 case 7: 10565 case 10: 10566 case 11: 10567 case 14: 10568 case 15: 10569 HALT_UNALLOC; 10570 } 10571 } 10572 10573 /* N.B. A preliminary note regarding all the ldrs<x>32 10574 instructions 10575 10576 The signed value loaded by these instructions is cast to unsigned 10577 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the 10578 64 bit element of the GReg union. this performs a 32 bit sign extension 10579 (as required) but avoids 64 bit sign extension, thus ensuring that the 10580 top half of the register word is zero. this is what the spec demands 10581 when a 32 bit load occurs. */ 10582 10583 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */ 10584 static void 10585 ldrsb32_abs (sim_cpu *cpu, uint32_t offset) 10586 { 10587 unsigned int rn = INSTR (9, 5); 10588 unsigned int rt = INSTR (4, 0); 10589 10590 /* The target register may not be SP but the source may be 10591 there is no scaling required for a byte load. */ 10592 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; 10593 aarch64_set_reg_u64 (cpu, rt, NO_SP, 10594 (int64_t) aarch64_get_mem_s8 (cpu, address)); 10595 } 10596 10597 /* 32 bit load sign-extended byte scaled or unscaled zero- 10598 or sign-extended 32-bit register offset. */ 10599 static void 10600 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10601 { 10602 unsigned int rm = INSTR (20, 16); 10603 unsigned int rn = INSTR (9, 5); 10604 unsigned int rt = INSTR (4, 0); 10605 10606 /* rn may reference SP, rm and rt must reference ZR. */ 10607 10608 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10609 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10610 extension); 10611 10612 /* There is no scaling required for a byte load. */ 10613 aarch64_set_reg_u64 10614 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address 10615 + displacement)); 10616 } 10617 10618 /* 32 bit load sign-extended byte unscaled signed 9 bit with 10619 pre- or post-writeback. */ 10620 static void 10621 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 10622 { 10623 uint64_t address; 10624 unsigned int rn = INSTR (9, 5); 10625 unsigned int rt = INSTR (4, 0); 10626 10627 if (rn == rt && wb != NoWriteBack) 10628 HALT_UNALLOC; 10629 10630 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10631 10632 if (wb == Pre) 10633 address += offset; 10634 10635 aarch64_set_reg_u64 (cpu, rt, NO_SP, 10636 (int64_t) aarch64_get_mem_s8 (cpu, address)); 10637 10638 if (wb == Post) 10639 address += offset; 10640 10641 if (wb != NoWriteBack) 10642 aarch64_set_reg_u64 (cpu, rn, NO_SP, address); 10643 } 10644 10645 /* 8 bit store scaled. */ 10646 static void 10647 fstrb_abs (sim_cpu *cpu, uint32_t offset) 10648 { 10649 unsigned st = INSTR (4, 0); 10650 unsigned rn = INSTR (9, 5); 10651 10652 aarch64_set_mem_u8 (cpu, 10653 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 10654 aarch64_get_vec_u8 (cpu, st, 0)); 10655 } 10656 10657 /* 8 bit store scaled or unscaled zero- or 10658 sign-extended 8-bit register offset. */ 10659 static void 10660 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10661 { 10662 unsigned rm = INSTR (20, 16); 10663 unsigned rn = INSTR (9, 5); 10664 unsigned st = INSTR (4, 0); 10665 10666 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10667 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10668 extension); 10669 uint64_t displacement = scaling == Scaled ? extended : 0; 10670 10671 aarch64_set_mem_u8 10672 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0)); 10673 } 10674 10675 /* 16 bit store scaled. */ 10676 static void 10677 fstrh_abs (sim_cpu *cpu, uint32_t offset) 10678 { 10679 unsigned st = INSTR (4, 0); 10680 unsigned rn = INSTR (9, 5); 10681 10682 aarch64_set_mem_u16 10683 (cpu, 10684 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16), 10685 aarch64_get_vec_u16 (cpu, st, 0)); 10686 } 10687 10688 /* 16 bit store scaled or unscaled zero- 10689 or sign-extended 16-bit register offset. */ 10690 static void 10691 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10692 { 10693 unsigned rm = INSTR (20, 16); 10694 unsigned rn = INSTR (9, 5); 10695 unsigned st = INSTR (4, 0); 10696 10697 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10698 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10699 extension); 10700 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 10701 10702 aarch64_set_mem_u16 10703 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0)); 10704 } 10705 10706 /* 32 bit store scaled unsigned 12 bit. */ 10707 static void 10708 fstrs_abs (sim_cpu *cpu, uint32_t offset) 10709 { 10710 unsigned st = INSTR (4, 0); 10711 unsigned rn = INSTR (9, 5); 10712 10713 aarch64_set_mem_u32 10714 (cpu, 10715 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32), 10716 aarch64_get_vec_u32 (cpu, st, 0)); 10717 } 10718 10719 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ 10720 static void 10721 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 10722 { 10723 unsigned rn = INSTR (9, 5); 10724 unsigned st = INSTR (4, 0); 10725 10726 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10727 10728 if (wb != Post) 10729 address += offset; 10730 10731 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0)); 10732 10733 if (wb == Post) 10734 address += offset; 10735 10736 if (wb != NoWriteBack) 10737 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 10738 } 10739 10740 /* 32 bit store scaled or unscaled zero- 10741 or sign-extended 32-bit register offset. */ 10742 static void 10743 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10744 { 10745 unsigned rm = INSTR (20, 16); 10746 unsigned rn = INSTR (9, 5); 10747 unsigned st = INSTR (4, 0); 10748 10749 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10750 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10751 extension); 10752 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 10753 10754 aarch64_set_mem_u32 10755 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0)); 10756 } 10757 10758 /* 64 bit store scaled unsigned 12 bit. */ 10759 static void 10760 fstrd_abs (sim_cpu *cpu, uint32_t offset) 10761 { 10762 unsigned st = INSTR (4, 0); 10763 unsigned rn = INSTR (9, 5); 10764 10765 aarch64_set_mem_u64 10766 (cpu, 10767 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64), 10768 aarch64_get_vec_u64 (cpu, st, 0)); 10769 } 10770 10771 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ 10772 static void 10773 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 10774 { 10775 unsigned rn = INSTR (9, 5); 10776 unsigned st = INSTR (4, 0); 10777 10778 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10779 10780 if (wb != Post) 10781 address += offset; 10782 10783 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0)); 10784 10785 if (wb == Post) 10786 address += offset; 10787 10788 if (wb != NoWriteBack) 10789 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 10790 } 10791 10792 /* 64 bit store scaled or unscaled zero- 10793 or sign-extended 32-bit register offset. */ 10794 static void 10795 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10796 { 10797 unsigned rm = INSTR (20, 16); 10798 unsigned rn = INSTR (9, 5); 10799 unsigned st = INSTR (4, 0); 10800 10801 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10802 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10803 extension); 10804 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 10805 10806 aarch64_set_mem_u64 10807 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0)); 10808 } 10809 10810 /* 128 bit store scaled unsigned 12 bit. */ 10811 static void 10812 fstrq_abs (sim_cpu *cpu, uint32_t offset) 10813 { 10814 FRegister a; 10815 unsigned st = INSTR (4, 0); 10816 unsigned rn = INSTR (9, 5); 10817 uint64_t addr; 10818 10819 aarch64_get_FP_long_double (cpu, st, & a); 10820 10821 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); 10822 aarch64_set_mem_long_double (cpu, addr, a); 10823 } 10824 10825 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */ 10826 static void 10827 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 10828 { 10829 FRegister a; 10830 unsigned rn = INSTR (9, 5); 10831 unsigned st = INSTR (4, 0); 10832 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10833 10834 if (wb != Post) 10835 address += offset; 10836 10837 aarch64_get_FP_long_double (cpu, st, & a); 10838 aarch64_set_mem_long_double (cpu, address, a); 10839 10840 if (wb == Post) 10841 address += offset; 10842 10843 if (wb != NoWriteBack) 10844 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 10845 } 10846 10847 /* 128 bit store scaled or unscaled zero- 10848 or sign-extended 32-bit register offset. */ 10849 static void 10850 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10851 { 10852 unsigned rm = INSTR (20, 16); 10853 unsigned rn = INSTR (9, 5); 10854 unsigned st = INSTR (4, 0); 10855 10856 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10857 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10858 extension); 10859 uint64_t displacement = OPT_SCALE (extended, 128, scaling); 10860 10861 FRegister a; 10862 10863 aarch64_get_FP_long_double (cpu, st, & a); 10864 aarch64_set_mem_long_double (cpu, address + displacement, a); 10865 } 10866 10867 static void 10868 dexLoadImmediatePrePost (sim_cpu *cpu) 10869 { 10870 /* instr[31,30] = size 10871 instr[29,27] = 111 10872 instr[26] = V 10873 instr[25,24] = 00 10874 instr[23,22] = opc 10875 instr[21] = 0 10876 instr[20,12] = simm9 10877 instr[11] = wb : 0 ==> Post, 1 ==> Pre 10878 instr[10] = 0 10879 instr[9,5] = Rn may be SP. 10880 instr[4,0] = Rt */ 10881 10882 uint32_t V = INSTR (26, 26); 10883 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); 10884 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); 10885 WriteBack wb = INSTR (11, 11); 10886 10887 if (!V) 10888 { 10889 /* GReg operations. */ 10890 switch (dispatch) 10891 { 10892 case 0: strb_wb (cpu, imm, wb); return; 10893 case 1: ldrb32_wb (cpu, imm, wb); return; 10894 case 2: ldrsb_wb (cpu, imm, wb); return; 10895 case 3: ldrsb32_wb (cpu, imm, wb); return; 10896 case 4: strh_wb (cpu, imm, wb); return; 10897 case 5: ldrh32_wb (cpu, imm, wb); return; 10898 case 6: ldrsh64_wb (cpu, imm, wb); return; 10899 case 7: ldrsh32_wb (cpu, imm, wb); return; 10900 case 8: str32_wb (cpu, imm, wb); return; 10901 case 9: ldr32_wb (cpu, imm, wb); return; 10902 case 10: ldrsw_wb (cpu, imm, wb); return; 10903 case 12: str_wb (cpu, imm, wb); return; 10904 case 13: ldr_wb (cpu, imm, wb); return; 10905 10906 default: 10907 case 11: 10908 case 14: 10909 case 15: 10910 HALT_UNALLOC; 10911 } 10912 } 10913 10914 /* FReg operations. */ 10915 switch (dispatch) 10916 { 10917 case 2: fstrq_wb (cpu, imm, wb); return; 10918 case 3: fldrq_wb (cpu, imm, wb); return; 10919 case 8: fstrs_wb (cpu, imm, wb); return; 10920 case 9: fldrs_wb (cpu, imm, wb); return; 10921 case 12: fstrd_wb (cpu, imm, wb); return; 10922 case 13: fldrd_wb (cpu, imm, wb); return; 10923 10924 case 0: /* STUR 8 bit FP. */ 10925 case 1: /* LDUR 8 bit FP. */ 10926 case 4: /* STUR 16 bit FP. */ 10927 case 5: /* LDUR 8 bit FP. */ 10928 HALT_NYI; 10929 10930 default: 10931 case 6: 10932 case 7: 10933 case 10: 10934 case 11: 10935 case 14: 10936 case 15: 10937 HALT_UNALLOC; 10938 } 10939 } 10940 10941 static void 10942 dexLoadRegisterOffset (sim_cpu *cpu) 10943 { 10944 /* instr[31,30] = size 10945 instr[29,27] = 111 10946 instr[26] = V 10947 instr[25,24] = 00 10948 instr[23,22] = opc 10949 instr[21] = 1 10950 instr[20,16] = rm 10951 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL, 10952 110 ==> SXTW, 111 ==> SXTX, 10953 ow ==> RESERVED 10954 instr[12] = scaled 10955 instr[11,10] = 10 10956 instr[9,5] = rn 10957 instr[4,0] = rt. */ 10958 10959 uint32_t V = INSTR (26, 26); 10960 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); 10961 Scaling scale = INSTR (12, 12); 10962 Extension extensionType = INSTR (15, 13); 10963 10964 /* Check for illegal extension types. */ 10965 if (uimm (extensionType, 1, 1) == 0) 10966 HALT_UNALLOC; 10967 10968 if (extensionType == UXTX || extensionType == SXTX) 10969 extensionType = NoExtension; 10970 10971 if (!V) 10972 { 10973 /* GReg operations. */ 10974 switch (dispatch) 10975 { 10976 case 0: strb_scale_ext (cpu, scale, extensionType); return; 10977 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return; 10978 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return; 10979 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return; 10980 case 4: strh_scale_ext (cpu, scale, extensionType); return; 10981 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return; 10982 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return; 10983 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return; 10984 case 8: str32_scale_ext (cpu, scale, extensionType); return; 10985 case 9: ldr32_scale_ext (cpu, scale, extensionType); return; 10986 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return; 10987 case 12: str_scale_ext (cpu, scale, extensionType); return; 10988 case 13: ldr_scale_ext (cpu, scale, extensionType); return; 10989 case 14: prfm_scale_ext (cpu, scale, extensionType); return; 10990 10991 default: 10992 case 11: 10993 case 15: 10994 HALT_UNALLOC; 10995 } 10996 } 10997 10998 /* FReg operations. */ 10999 switch (dispatch) 11000 { 11001 case 1: /* LDUR 8 bit FP. */ 11002 HALT_NYI; 11003 case 3: fldrq_scale_ext (cpu, scale, extensionType); return; 11004 case 5: /* LDUR 8 bit FP. */ 11005 HALT_NYI; 11006 case 9: fldrs_scale_ext (cpu, scale, extensionType); return; 11007 case 13: fldrd_scale_ext (cpu, scale, extensionType); return; 11008 11009 case 0: fstrb_scale_ext (cpu, scale, extensionType); return; 11010 case 2: fstrq_scale_ext (cpu, scale, extensionType); return; 11011 case 4: fstrh_scale_ext (cpu, scale, extensionType); return; 11012 case 8: fstrs_scale_ext (cpu, scale, extensionType); return; 11013 case 12: fstrd_scale_ext (cpu, scale, extensionType); return; 11014 11015 default: 11016 case 6: 11017 case 7: 11018 case 10: 11019 case 11: 11020 case 14: 11021 case 15: 11022 HALT_UNALLOC; 11023 } 11024 } 11025 11026 static void 11027 dexLoadUnsignedImmediate (sim_cpu *cpu) 11028 { 11029 /* instr[29,24] == 111_01 11030 instr[31,30] = size 11031 instr[26] = V 11032 instr[23,22] = opc 11033 instr[21,10] = uimm12 : unsigned immediate offset 11034 instr[9,5] = rn may be SP. 11035 instr[4,0] = rt. */ 11036 11037 uint32_t V = INSTR (26,26); 11038 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); 11039 uint32_t imm = INSTR (21, 10); 11040 11041 if (!V) 11042 { 11043 /* GReg operations. */ 11044 switch (dispatch) 11045 { 11046 case 0: strb_abs (cpu, imm); return; 11047 case 1: ldrb32_abs (cpu, imm); return; 11048 case 2: ldrsb_abs (cpu, imm); return; 11049 case 3: ldrsb32_abs (cpu, imm); return; 11050 case 4: strh_abs (cpu, imm); return; 11051 case 5: ldrh32_abs (cpu, imm); return; 11052 case 6: ldrsh_abs (cpu, imm); return; 11053 case 7: ldrsh32_abs (cpu, imm); return; 11054 case 8: str32_abs (cpu, imm); return; 11055 case 9: ldr32_abs (cpu, imm); return; 11056 case 10: ldrsw_abs (cpu, imm); return; 11057 case 12: str_abs (cpu, imm); return; 11058 case 13: ldr_abs (cpu, imm); return; 11059 case 14: prfm_abs (cpu, imm); return; 11060 11061 default: 11062 case 11: 11063 case 15: 11064 HALT_UNALLOC; 11065 } 11066 } 11067 11068 /* FReg operations. */ 11069 switch (dispatch) 11070 { 11071 case 0: fstrb_abs (cpu, imm); return; 11072 case 4: fstrh_abs (cpu, imm); return; 11073 case 8: fstrs_abs (cpu, imm); return; 11074 case 12: fstrd_abs (cpu, imm); return; 11075 case 2: fstrq_abs (cpu, imm); return; 11076 11077 case 1: fldrb_abs (cpu, imm); return; 11078 case 5: fldrh_abs (cpu, imm); return; 11079 case 9: fldrs_abs (cpu, imm); return; 11080 case 13: fldrd_abs (cpu, imm); return; 11081 case 3: fldrq_abs (cpu, imm); return; 11082 11083 default: 11084 case 6: 11085 case 7: 11086 case 10: 11087 case 11: 11088 case 14: 11089 case 15: 11090 HALT_UNALLOC; 11091 } 11092 } 11093 11094 static void 11095 dexLoadExclusive (sim_cpu *cpu) 11096 { 11097 /* assert instr[29:24] = 001000; 11098 instr[31,30] = size 11099 instr[23] = 0 if exclusive 11100 instr[22] = L : 1 if load, 0 if store 11101 instr[21] = 1 if pair 11102 instr[20,16] = Rs 11103 instr[15] = o0 : 1 if ordered 11104 instr[14,10] = Rt2 11105 instr[9,5] = Rn 11106 instr[4.0] = Rt. */ 11107 11108 switch (INSTR (22, 21)) 11109 { 11110 case 2: ldxr (cpu); return; 11111 case 0: stxr (cpu); return; 11112 default: HALT_NYI; 11113 } 11114 } 11115 11116 static void 11117 dexLoadOther (sim_cpu *cpu) 11118 { 11119 uint32_t dispatch; 11120 11121 /* instr[29,25] = 111_0 11122 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate 11123 instr[21:11,10] is the secondary dispatch. */ 11124 if (INSTR (24, 24)) 11125 { 11126 dexLoadUnsignedImmediate (cpu); 11127 return; 11128 } 11129 11130 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10)); 11131 switch (dispatch) 11132 { 11133 case 0: dexLoadUnscaledImmediate (cpu); return; 11134 case 1: dexLoadImmediatePrePost (cpu); return; 11135 case 3: dexLoadImmediatePrePost (cpu); return; 11136 case 6: dexLoadRegisterOffset (cpu); return; 11137 11138 default: 11139 case 2: 11140 case 4: 11141 case 5: 11142 case 7: 11143 HALT_NYI; 11144 } 11145 } 11146 11147 static void 11148 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11149 { 11150 unsigned rn = INSTR (14, 10); 11151 unsigned rd = INSTR (9, 5); 11152 unsigned rm = INSTR (4, 0); 11153 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11154 11155 if ((rn == rd || rm == rd) && wb != NoWriteBack) 11156 HALT_UNALLOC; /* ??? */ 11157 11158 offset <<= 2; 11159 11160 if (wb != Post) 11161 address += offset; 11162 11163 aarch64_set_mem_u32 (cpu, address, 11164 aarch64_get_reg_u32 (cpu, rm, NO_SP)); 11165 aarch64_set_mem_u32 (cpu, address + 4, 11166 aarch64_get_reg_u32 (cpu, rn, NO_SP)); 11167 11168 if (wb == Post) 11169 address += offset; 11170 11171 if (wb != NoWriteBack) 11172 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11173 } 11174 11175 static void 11176 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11177 { 11178 unsigned rn = INSTR (14, 10); 11179 unsigned rd = INSTR (9, 5); 11180 unsigned rm = INSTR (4, 0); 11181 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11182 11183 if ((rn == rd || rm == rd) && wb != NoWriteBack) 11184 HALT_UNALLOC; /* ??? */ 11185 11186 offset <<= 3; 11187 11188 if (wb != Post) 11189 address += offset; 11190 11191 aarch64_set_mem_u64 (cpu, address, 11192 aarch64_get_reg_u64 (cpu, rm, NO_SP)); 11193 aarch64_set_mem_u64 (cpu, address + 8, 11194 aarch64_get_reg_u64 (cpu, rn, NO_SP)); 11195 11196 if (wb == Post) 11197 address += offset; 11198 11199 if (wb != NoWriteBack) 11200 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11201 } 11202 11203 static void 11204 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11205 { 11206 unsigned rn = INSTR (14, 10); 11207 unsigned rd = INSTR (9, 5); 11208 unsigned rm = INSTR (4, 0); 11209 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11210 11211 /* Treat this as unalloc to make sure we don't do it. */ 11212 if (rn == rm) 11213 HALT_UNALLOC; 11214 11215 offset <<= 2; 11216 11217 if (wb != Post) 11218 address += offset; 11219 11220 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address)); 11221 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4)); 11222 11223 if (wb == Post) 11224 address += offset; 11225 11226 if (wb != NoWriteBack) 11227 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11228 } 11229 11230 static void 11231 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11232 { 11233 unsigned rn = INSTR (14, 10); 11234 unsigned rd = INSTR (9, 5); 11235 unsigned rm = INSTR (4, 0); 11236 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11237 11238 /* Treat this as unalloc to make sure we don't do it. */ 11239 if (rn == rm) 11240 HALT_UNALLOC; 11241 11242 offset <<= 2; 11243 11244 if (wb != Post) 11245 address += offset; 11246 11247 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address)); 11248 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4)); 11249 11250 if (wb == Post) 11251 address += offset; 11252 11253 if (wb != NoWriteBack) 11254 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11255 } 11256 11257 static void 11258 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11259 { 11260 unsigned rn = INSTR (14, 10); 11261 unsigned rd = INSTR (9, 5); 11262 unsigned rm = INSTR (4, 0); 11263 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11264 11265 /* Treat this as unalloc to make sure we don't do it. */ 11266 if (rn == rm) 11267 HALT_UNALLOC; 11268 11269 offset <<= 3; 11270 11271 if (wb != Post) 11272 address += offset; 11273 11274 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address)); 11275 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8)); 11276 11277 if (wb == Post) 11278 address += offset; 11279 11280 if (wb != NoWriteBack) 11281 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11282 } 11283 11284 static void 11285 dex_load_store_pair_gr (sim_cpu *cpu) 11286 { 11287 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit) 11288 instr[29,25] = instruction encoding: 101_0 11289 instr[26] = V : 1 if fp 0 if gp 11290 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre) 11291 instr[22] = load/store (1=> load) 11292 instr[21,15] = signed, scaled, offset 11293 instr[14,10] = Rn 11294 instr[ 9, 5] = Rd 11295 instr[ 4, 0] = Rm. */ 11296 11297 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22)); 11298 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); 11299 11300 switch (dispatch) 11301 { 11302 case 2: store_pair_u32 (cpu, offset, Post); return; 11303 case 3: load_pair_u32 (cpu, offset, Post); return; 11304 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return; 11305 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return; 11306 case 6: store_pair_u32 (cpu, offset, Pre); return; 11307 case 7: load_pair_u32 (cpu, offset, Pre); return; 11308 11309 case 11: load_pair_s32 (cpu, offset, Post); return; 11310 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return; 11311 case 15: load_pair_s32 (cpu, offset, Pre); return; 11312 11313 case 18: store_pair_u64 (cpu, offset, Post); return; 11314 case 19: load_pair_u64 (cpu, offset, Post); return; 11315 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return; 11316 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return; 11317 case 22: store_pair_u64 (cpu, offset, Pre); return; 11318 case 23: load_pair_u64 (cpu, offset, Pre); return; 11319 11320 default: 11321 HALT_UNALLOC; 11322 } 11323 } 11324 11325 static void 11326 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) 11327 { 11328 unsigned rn = INSTR (14, 10); 11329 unsigned rd = INSTR (9, 5); 11330 unsigned rm = INSTR (4, 0); 11331 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11332 11333 offset <<= 2; 11334 11335 if (wb != Post) 11336 address += offset; 11337 11338 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0)); 11339 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0)); 11340 11341 if (wb == Post) 11342 address += offset; 11343 11344 if (wb != NoWriteBack) 11345 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11346 } 11347 11348 static void 11349 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) 11350 { 11351 unsigned rn = INSTR (14, 10); 11352 unsigned rd = INSTR (9, 5); 11353 unsigned rm = INSTR (4, 0); 11354 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11355 11356 offset <<= 3; 11357 11358 if (wb != Post) 11359 address += offset; 11360 11361 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0)); 11362 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0)); 11363 11364 if (wb == Post) 11365 address += offset; 11366 11367 if (wb != NoWriteBack) 11368 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11369 } 11370 11371 static void 11372 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) 11373 { 11374 FRegister a; 11375 unsigned rn = INSTR (14, 10); 11376 unsigned rd = INSTR (9, 5); 11377 unsigned rm = INSTR (4, 0); 11378 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11379 11380 offset <<= 4; 11381 11382 if (wb != Post) 11383 address += offset; 11384 11385 aarch64_get_FP_long_double (cpu, rm, & a); 11386 aarch64_set_mem_long_double (cpu, address, a); 11387 aarch64_get_FP_long_double (cpu, rn, & a); 11388 aarch64_set_mem_long_double (cpu, address + 16, a); 11389 11390 if (wb == Post) 11391 address += offset; 11392 11393 if (wb != NoWriteBack) 11394 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11395 } 11396 11397 static void 11398 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) 11399 { 11400 unsigned rn = INSTR (14, 10); 11401 unsigned rd = INSTR (9, 5); 11402 unsigned rm = INSTR (4, 0); 11403 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11404 11405 if (rm == rn) 11406 HALT_UNALLOC; 11407 11408 offset <<= 2; 11409 11410 if (wb != Post) 11411 address += offset; 11412 11413 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address)); 11414 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4)); 11415 11416 if (wb == Post) 11417 address += offset; 11418 11419 if (wb != NoWriteBack) 11420 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11421 } 11422 11423 static void 11424 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) 11425 { 11426 unsigned rn = INSTR (14, 10); 11427 unsigned rd = INSTR (9, 5); 11428 unsigned rm = INSTR (4, 0); 11429 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11430 11431 if (rm == rn) 11432 HALT_UNALLOC; 11433 11434 offset <<= 3; 11435 11436 if (wb != Post) 11437 address += offset; 11438 11439 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address)); 11440 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8)); 11441 11442 if (wb == Post) 11443 address += offset; 11444 11445 if (wb != NoWriteBack) 11446 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11447 } 11448 11449 static void 11450 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) 11451 { 11452 FRegister a; 11453 unsigned rn = INSTR (14, 10); 11454 unsigned rd = INSTR (9, 5); 11455 unsigned rm = INSTR (4, 0); 11456 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11457 11458 if (rm == rn) 11459 HALT_UNALLOC; 11460 11461 offset <<= 4; 11462 11463 if (wb != Post) 11464 address += offset; 11465 11466 aarch64_get_mem_long_double (cpu, address, & a); 11467 aarch64_set_FP_long_double (cpu, rm, a); 11468 aarch64_get_mem_long_double (cpu, address + 16, & a); 11469 aarch64_set_FP_long_double (cpu, rn, a); 11470 11471 if (wb == Post) 11472 address += offset; 11473 11474 if (wb != NoWriteBack) 11475 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11476 } 11477 11478 static void 11479 dex_load_store_pair_fp (sim_cpu *cpu) 11480 { 11481 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit) 11482 instr[29,25] = instruction encoding 11483 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre) 11484 instr[22] = load/store (1=> load) 11485 instr[21,15] = signed, scaled, offset 11486 instr[14,10] = Rn 11487 instr[ 9, 5] = Rd 11488 instr[ 4, 0] = Rm */ 11489 11490 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22)); 11491 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); 11492 11493 switch (dispatch) 11494 { 11495 case 2: store_pair_float (cpu, offset, Post); return; 11496 case 3: load_pair_float (cpu, offset, Post); return; 11497 case 4: store_pair_float (cpu, offset, NoWriteBack); return; 11498 case 5: load_pair_float (cpu, offset, NoWriteBack); return; 11499 case 6: store_pair_float (cpu, offset, Pre); return; 11500 case 7: load_pair_float (cpu, offset, Pre); return; 11501 11502 case 10: store_pair_double (cpu, offset, Post); return; 11503 case 11: load_pair_double (cpu, offset, Post); return; 11504 case 12: store_pair_double (cpu, offset, NoWriteBack); return; 11505 case 13: load_pair_double (cpu, offset, NoWriteBack); return; 11506 case 14: store_pair_double (cpu, offset, Pre); return; 11507 case 15: load_pair_double (cpu, offset, Pre); return; 11508 11509 case 18: store_pair_long_double (cpu, offset, Post); return; 11510 case 19: load_pair_long_double (cpu, offset, Post); return; 11511 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return; 11512 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return; 11513 case 22: store_pair_long_double (cpu, offset, Pre); return; 11514 case 23: load_pair_long_double (cpu, offset, Pre); return; 11515 11516 default: 11517 HALT_UNALLOC; 11518 } 11519 } 11520 11521 static inline unsigned 11522 vec_reg (unsigned v, unsigned o) 11523 { 11524 return (v + o) & 0x3F; 11525 } 11526 11527 /* Load multiple N-element structures to N consecutive registers. */ 11528 static void 11529 vec_load (sim_cpu *cpu, uint64_t address, unsigned N) 11530 { 11531 int all = INSTR (30, 30); 11532 unsigned size = INSTR (11, 10); 11533 unsigned vd = INSTR (4, 0); 11534 unsigned i; 11535 11536 switch (size) 11537 { 11538 case 0: /* 8-bit operations. */ 11539 if (all) 11540 for (i = 0; i < (16 * N); i++) 11541 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15, 11542 aarch64_get_mem_u8 (cpu, address + i)); 11543 else 11544 for (i = 0; i < (8 * N); i++) 11545 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7, 11546 aarch64_get_mem_u8 (cpu, address + i)); 11547 return; 11548 11549 case 1: /* 16-bit operations. */ 11550 if (all) 11551 for (i = 0; i < (8 * N); i++) 11552 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7, 11553 aarch64_get_mem_u16 (cpu, address + i * 2)); 11554 else 11555 for (i = 0; i < (4 * N); i++) 11556 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3, 11557 aarch64_get_mem_u16 (cpu, address + i * 2)); 11558 return; 11559 11560 case 2: /* 32-bit operations. */ 11561 if (all) 11562 for (i = 0; i < (4 * N); i++) 11563 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3, 11564 aarch64_get_mem_u32 (cpu, address + i * 4)); 11565 else 11566 for (i = 0; i < (2 * N); i++) 11567 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1, 11568 aarch64_get_mem_u32 (cpu, address + i * 4)); 11569 return; 11570 11571 case 3: /* 64-bit operations. */ 11572 if (all) 11573 for (i = 0; i < (2 * N); i++) 11574 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1, 11575 aarch64_get_mem_u64 (cpu, address + i * 8)); 11576 else 11577 for (i = 0; i < N; i++) 11578 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0, 11579 aarch64_get_mem_u64 (cpu, address + i * 8)); 11580 return; 11581 } 11582 } 11583 11584 /* LD4: load multiple 4-element to four consecutive registers. */ 11585 static void 11586 LD4 (sim_cpu *cpu, uint64_t address) 11587 { 11588 vec_load (cpu, address, 4); 11589 } 11590 11591 /* LD3: load multiple 3-element structures to three consecutive registers. */ 11592 static void 11593 LD3 (sim_cpu *cpu, uint64_t address) 11594 { 11595 vec_load (cpu, address, 3); 11596 } 11597 11598 /* LD2: load multiple 2-element structures to two consecutive registers. */ 11599 static void 11600 LD2 (sim_cpu *cpu, uint64_t address) 11601 { 11602 vec_load (cpu, address, 2); 11603 } 11604 11605 /* Load multiple 1-element structures into one register. */ 11606 static void 11607 LD1_1 (sim_cpu *cpu, uint64_t address) 11608 { 11609 int all = INSTR (30, 30); 11610 unsigned size = INSTR (11, 10); 11611 unsigned vd = INSTR (4, 0); 11612 unsigned i; 11613 11614 switch (size) 11615 { 11616 case 0: 11617 /* LD1 {Vd.16b}, addr, #16 */ 11618 /* LD1 {Vd.8b}, addr, #8 */ 11619 for (i = 0; i < (all ? 16 : 8); i++) 11620 aarch64_set_vec_u8 (cpu, vd, i, 11621 aarch64_get_mem_u8 (cpu, address + i)); 11622 return; 11623 11624 case 1: 11625 /* LD1 {Vd.8h}, addr, #16 */ 11626 /* LD1 {Vd.4h}, addr, #8 */ 11627 for (i = 0; i < (all ? 8 : 4); i++) 11628 aarch64_set_vec_u16 (cpu, vd, i, 11629 aarch64_get_mem_u16 (cpu, address + i * 2)); 11630 return; 11631 11632 case 2: 11633 /* LD1 {Vd.4s}, addr, #16 */ 11634 /* LD1 {Vd.2s}, addr, #8 */ 11635 for (i = 0; i < (all ? 4 : 2); i++) 11636 aarch64_set_vec_u32 (cpu, vd, i, 11637 aarch64_get_mem_u32 (cpu, address + i * 4)); 11638 return; 11639 11640 case 3: 11641 /* LD1 {Vd.2d}, addr, #16 */ 11642 /* LD1 {Vd.1d}, addr, #8 */ 11643 for (i = 0; i < (all ? 2 : 1); i++) 11644 aarch64_set_vec_u64 (cpu, vd, i, 11645 aarch64_get_mem_u64 (cpu, address + i * 8)); 11646 return; 11647 } 11648 } 11649 11650 /* Load multiple 1-element structures into two registers. */ 11651 static void 11652 LD1_2 (sim_cpu *cpu, uint64_t address) 11653 { 11654 /* FIXME: This algorithm is *exactly* the same as the LD2 version. 11655 So why have two different instructions ? There must be something 11656 wrong somewhere. */ 11657 vec_load (cpu, address, 2); 11658 } 11659 11660 /* Load multiple 1-element structures into three registers. */ 11661 static void 11662 LD1_3 (sim_cpu *cpu, uint64_t address) 11663 { 11664 /* FIXME: This algorithm is *exactly* the same as the LD3 version. 11665 So why have two different instructions ? There must be something 11666 wrong somewhere. */ 11667 vec_load (cpu, address, 3); 11668 } 11669 11670 /* Load multiple 1-element structures into four registers. */ 11671 static void 11672 LD1_4 (sim_cpu *cpu, uint64_t address) 11673 { 11674 /* FIXME: This algorithm is *exactly* the same as the LD4 version. 11675 So why have two different instructions ? There must be something 11676 wrong somewhere. */ 11677 vec_load (cpu, address, 4); 11678 } 11679 11680 /* Store multiple N-element structures to N consecutive registers. */ 11681 static void 11682 vec_store (sim_cpu *cpu, uint64_t address, unsigned N) 11683 { 11684 int all = INSTR (30, 30); 11685 unsigned size = INSTR (11, 10); 11686 unsigned vd = INSTR (4, 0); 11687 unsigned i; 11688 11689 switch (size) 11690 { 11691 case 0: /* 8-bit operations. */ 11692 if (all) 11693 for (i = 0; i < (16 * N); i++) 11694 aarch64_set_mem_u8 11695 (cpu, address + i, 11696 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15)); 11697 else 11698 for (i = 0; i < (8 * N); i++) 11699 aarch64_set_mem_u8 11700 (cpu, address + i, 11701 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7)); 11702 return; 11703 11704 case 1: /* 16-bit operations. */ 11705 if (all) 11706 for (i = 0; i < (8 * N); i++) 11707 aarch64_set_mem_u16 11708 (cpu, address + i * 2, 11709 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7)); 11710 else 11711 for (i = 0; i < (4 * N); i++) 11712 aarch64_set_mem_u16 11713 (cpu, address + i * 2, 11714 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3)); 11715 return; 11716 11717 case 2: /* 32-bit operations. */ 11718 if (all) 11719 for (i = 0; i < (4 * N); i++) 11720 aarch64_set_mem_u32 11721 (cpu, address + i * 4, 11722 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3)); 11723 else 11724 for (i = 0; i < (2 * N); i++) 11725 aarch64_set_mem_u32 11726 (cpu, address + i * 4, 11727 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1)); 11728 return; 11729 11730 case 3: /* 64-bit operations. */ 11731 if (all) 11732 for (i = 0; i < (2 * N); i++) 11733 aarch64_set_mem_u64 11734 (cpu, address + i * 8, 11735 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1)); 11736 else 11737 for (i = 0; i < N; i++) 11738 aarch64_set_mem_u64 11739 (cpu, address + i * 8, 11740 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0)); 11741 return; 11742 } 11743 } 11744 11745 /* Store multiple 4-element structure to four consecutive registers. */ 11746 static void 11747 ST4 (sim_cpu *cpu, uint64_t address) 11748 { 11749 vec_store (cpu, address, 4); 11750 } 11751 11752 /* Store multiple 3-element structures to three consecutive registers. */ 11753 static void 11754 ST3 (sim_cpu *cpu, uint64_t address) 11755 { 11756 vec_store (cpu, address, 3); 11757 } 11758 11759 /* Store multiple 2-element structures to two consecutive registers. */ 11760 static void 11761 ST2 (sim_cpu *cpu, uint64_t address) 11762 { 11763 vec_store (cpu, address, 2); 11764 } 11765 11766 /* Store multiple 1-element structures into one register. */ 11767 static void 11768 ST1_1 (sim_cpu *cpu, uint64_t address) 11769 { 11770 int all = INSTR (30, 30); 11771 unsigned size = INSTR (11, 10); 11772 unsigned vd = INSTR (4, 0); 11773 unsigned i; 11774 11775 switch (size) 11776 { 11777 case 0: 11778 for (i = 0; i < (all ? 16 : 8); i++) 11779 aarch64_set_mem_u8 (cpu, address + i, 11780 aarch64_get_vec_u8 (cpu, vd, i)); 11781 return; 11782 11783 case 1: 11784 for (i = 0; i < (all ? 8 : 4); i++) 11785 aarch64_set_mem_u16 (cpu, address + i * 2, 11786 aarch64_get_vec_u16 (cpu, vd, i)); 11787 return; 11788 11789 case 2: 11790 for (i = 0; i < (all ? 4 : 2); i++) 11791 aarch64_set_mem_u32 (cpu, address + i * 4, 11792 aarch64_get_vec_u32 (cpu, vd, i)); 11793 return; 11794 11795 case 3: 11796 for (i = 0; i < (all ? 2 : 1); i++) 11797 aarch64_set_mem_u64 (cpu, address + i * 8, 11798 aarch64_get_vec_u64 (cpu, vd, i)); 11799 return; 11800 } 11801 } 11802 11803 /* Store multiple 1-element structures into two registers. */ 11804 static void 11805 ST1_2 (sim_cpu *cpu, uint64_t address) 11806 { 11807 /* FIXME: This algorithm is *exactly* the same as the ST2 version. 11808 So why have two different instructions ? There must be 11809 something wrong somewhere. */ 11810 vec_store (cpu, address, 2); 11811 } 11812 11813 /* Store multiple 1-element structures into three registers. */ 11814 static void 11815 ST1_3 (sim_cpu *cpu, uint64_t address) 11816 { 11817 /* FIXME: This algorithm is *exactly* the same as the ST3 version. 11818 So why have two different instructions ? There must be 11819 something wrong somewhere. */ 11820 vec_store (cpu, address, 3); 11821 } 11822 11823 /* Store multiple 1-element structures into four registers. */ 11824 static void 11825 ST1_4 (sim_cpu *cpu, uint64_t address) 11826 { 11827 /* FIXME: This algorithm is *exactly* the same as the ST4 version. 11828 So why have two different instructions ? There must be 11829 something wrong somewhere. */ 11830 vec_store (cpu, address, 4); 11831 } 11832 11833 #define LDn_STn_SINGLE_LANE_AND_SIZE() \ 11834 do \ 11835 { \ 11836 switch (INSTR (15, 14)) \ 11837 { \ 11838 case 0: \ 11839 lane = (full << 3) | (s << 2) | size; \ 11840 size = 0; \ 11841 break; \ 11842 \ 11843 case 1: \ 11844 if ((size & 1) == 1) \ 11845 HALT_UNALLOC; \ 11846 lane = (full << 2) | (s << 1) | (size >> 1); \ 11847 size = 1; \ 11848 break; \ 11849 \ 11850 case 2: \ 11851 if ((size & 2) == 2) \ 11852 HALT_UNALLOC; \ 11853 \ 11854 if ((size & 1) == 0) \ 11855 { \ 11856 lane = (full << 1) | s; \ 11857 size = 2; \ 11858 } \ 11859 else \ 11860 { \ 11861 if (s) \ 11862 HALT_UNALLOC; \ 11863 lane = full; \ 11864 size = 3; \ 11865 } \ 11866 break; \ 11867 \ 11868 default: \ 11869 HALT_UNALLOC; \ 11870 } \ 11871 } \ 11872 while (0) 11873 11874 /* Load single structure into one lane of N registers. */ 11875 static void 11876 do_vec_LDn_single (sim_cpu *cpu, uint64_t address) 11877 { 11878 /* instr[31] = 0 11879 instr[30] = element selector 0=>half, 1=>all elements 11880 instr[29,24] = 00 1101 11881 instr[23] = 0=>simple, 1=>post 11882 instr[22] = 1 11883 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1) 11884 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), 11885 11111 (immediate post inc) 11886 instr[15,13] = opcode 11887 instr[12] = S, used for lane number 11888 instr[11,10] = size, also used for lane number 11889 instr[9,5] = address 11890 instr[4,0] = Vd */ 11891 11892 unsigned full = INSTR (30, 30); 11893 unsigned vd = INSTR (4, 0); 11894 unsigned size = INSTR (11, 10); 11895 unsigned s = INSTR (12, 12); 11896 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; 11897 int lane = 0; 11898 int i; 11899 11900 NYI_assert (29, 24, 0x0D); 11901 NYI_assert (22, 22, 1); 11902 11903 /* Compute the lane number first (using size), and then compute size. */ 11904 LDn_STn_SINGLE_LANE_AND_SIZE (); 11905 11906 for (i = 0; i < nregs; i++) 11907 switch (size) 11908 { 11909 case 0: 11910 { 11911 uint8_t val = aarch64_get_mem_u8 (cpu, address + i); 11912 aarch64_set_vec_u8 (cpu, vd + i, lane, val); 11913 break; 11914 } 11915 11916 case 1: 11917 { 11918 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2)); 11919 aarch64_set_vec_u16 (cpu, vd + i, lane, val); 11920 break; 11921 } 11922 11923 case 2: 11924 { 11925 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4)); 11926 aarch64_set_vec_u32 (cpu, vd + i, lane, val); 11927 break; 11928 } 11929 11930 case 3: 11931 { 11932 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8)); 11933 aarch64_set_vec_u64 (cpu, vd + i, lane, val); 11934 break; 11935 } 11936 } 11937 } 11938 11939 /* Store single structure from one lane from N registers. */ 11940 static void 11941 do_vec_STn_single (sim_cpu *cpu, uint64_t address) 11942 { 11943 /* instr[31] = 0 11944 instr[30] = element selector 0=>half, 1=>all elements 11945 instr[29,24] = 00 1101 11946 instr[23] = 0=>simple, 1=>post 11947 instr[22] = 0 11948 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1) 11949 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), 11950 11111 (immediate post inc) 11951 instr[15,13] = opcode 11952 instr[12] = S, used for lane number 11953 instr[11,10] = size, also used for lane number 11954 instr[9,5] = address 11955 instr[4,0] = Vd */ 11956 11957 unsigned full = INSTR (30, 30); 11958 unsigned vd = INSTR (4, 0); 11959 unsigned size = INSTR (11, 10); 11960 unsigned s = INSTR (12, 12); 11961 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; 11962 int lane = 0; 11963 int i; 11964 11965 NYI_assert (29, 24, 0x0D); 11966 NYI_assert (22, 22, 0); 11967 11968 /* Compute the lane number first (using size), and then compute size. */ 11969 LDn_STn_SINGLE_LANE_AND_SIZE (); 11970 11971 for (i = 0; i < nregs; i++) 11972 switch (size) 11973 { 11974 case 0: 11975 { 11976 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane); 11977 aarch64_set_mem_u8 (cpu, address + i, val); 11978 break; 11979 } 11980 11981 case 1: 11982 { 11983 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane); 11984 aarch64_set_mem_u16 (cpu, address + (i * 2), val); 11985 break; 11986 } 11987 11988 case 2: 11989 { 11990 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane); 11991 aarch64_set_mem_u32 (cpu, address + (i * 4), val); 11992 break; 11993 } 11994 11995 case 3: 11996 { 11997 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane); 11998 aarch64_set_mem_u64 (cpu, address + (i * 8), val); 11999 break; 12000 } 12001 } 12002 } 12003 12004 /* Load single structure into all lanes of N registers. */ 12005 static void 12006 do_vec_LDnR (sim_cpu *cpu, uint64_t address) 12007 { 12008 /* instr[31] = 0 12009 instr[30] = element selector 0=>half, 1=>all elements 12010 instr[29,24] = 00 1101 12011 instr[23] = 0=>simple, 1=>post 12012 instr[22] = 1 12013 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1) 12014 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), 12015 11111 (immediate post inc) 12016 instr[15,14] = 11 12017 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1) 12018 instr[12] = 0 12019 instr[11,10] = element size 00=> byte(b), 01=> half(h), 12020 10=> word(s), 11=> double(d) 12021 instr[9,5] = address 12022 instr[4,0] = Vd */ 12023 12024 unsigned full = INSTR (30, 30); 12025 unsigned vd = INSTR (4, 0); 12026 unsigned size = INSTR (11, 10); 12027 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; 12028 int i, n; 12029 12030 NYI_assert (29, 24, 0x0D); 12031 NYI_assert (22, 22, 1); 12032 NYI_assert (15, 14, 3); 12033 NYI_assert (12, 12, 0); 12034 12035 for (n = 0; n < nregs; n++) 12036 switch (size) 12037 { 12038 case 0: 12039 { 12040 uint8_t val = aarch64_get_mem_u8 (cpu, address + n); 12041 for (i = 0; i < (full ? 16 : 8); i++) 12042 aarch64_set_vec_u8 (cpu, vd + n, i, val); 12043 break; 12044 } 12045 12046 case 1: 12047 { 12048 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2)); 12049 for (i = 0; i < (full ? 8 : 4); i++) 12050 aarch64_set_vec_u16 (cpu, vd + n, i, val); 12051 break; 12052 } 12053 12054 case 2: 12055 { 12056 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4)); 12057 for (i = 0; i < (full ? 4 : 2); i++) 12058 aarch64_set_vec_u32 (cpu, vd + n, i, val); 12059 break; 12060 } 12061 12062 case 3: 12063 { 12064 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8)); 12065 for (i = 0; i < (full ? 2 : 1); i++) 12066 aarch64_set_vec_u64 (cpu, vd + n, i, val); 12067 break; 12068 } 12069 12070 default: 12071 HALT_UNALLOC; 12072 } 12073 } 12074 12075 static void 12076 do_vec_load_store (sim_cpu *cpu) 12077 { 12078 /* {LD|ST}<N> {Vd..Vd+N}, vaddr 12079 12080 instr[31] = 0 12081 instr[30] = element selector 0=>half, 1=>all elements 12082 instr[29,25] = 00110 12083 instr[24] = 0=>multiple struct, 1=>single struct 12084 instr[23] = 0=>simple, 1=>post 12085 instr[22] = 0=>store, 1=>load 12086 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR) 12087 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP), 12088 11111 (immediate post inc) 12089 instr[15,12] = elements and destinations. eg for load: 12090 0000=>LD4 => load multiple 4-element to 12091 four consecutive registers 12092 0100=>LD3 => load multiple 3-element to 12093 three consecutive registers 12094 1000=>LD2 => load multiple 2-element to 12095 two consecutive registers 12096 0010=>LD1 => load multiple 1-element to 12097 four consecutive registers 12098 0110=>LD1 => load multiple 1-element to 12099 three consecutive registers 12100 1010=>LD1 => load multiple 1-element to 12101 two consecutive registers 12102 0111=>LD1 => load multiple 1-element to 12103 one register 12104 1100=>LDR1,LDR2 12105 1110=>LDR3,LDR4 12106 instr[11,10] = element size 00=> byte(b), 01=> half(h), 12107 10=> word(s), 11=> double(d) 12108 instr[9,5] = Vn, can be SP 12109 instr[4,0] = Vd */ 12110 12111 int single; 12112 int post; 12113 int load; 12114 unsigned vn; 12115 uint64_t address; 12116 int type; 12117 12118 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06) 12119 HALT_NYI; 12120 12121 single = INSTR (24, 24); 12122 post = INSTR (23, 23); 12123 load = INSTR (22, 22); 12124 type = INSTR (15, 12); 12125 vn = INSTR (9, 5); 12126 address = aarch64_get_reg_u64 (cpu, vn, SP_OK); 12127 12128 if (! single && INSTR (21, 21) != 0) 12129 HALT_UNALLOC; 12130 12131 if (post) 12132 { 12133 unsigned vm = INSTR (20, 16); 12134 12135 if (vm == R31) 12136 { 12137 unsigned sizeof_operation; 12138 12139 if (single) 12140 { 12141 if ((type >= 0) && (type <= 11)) 12142 { 12143 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; 12144 switch (INSTR (15, 14)) 12145 { 12146 case 0: 12147 sizeof_operation = nregs * 1; 12148 break; 12149 case 1: 12150 sizeof_operation = nregs * 2; 12151 break; 12152 case 2: 12153 if (INSTR (10, 10) == 0) 12154 sizeof_operation = nregs * 4; 12155 else 12156 sizeof_operation = nregs * 8; 12157 break; 12158 default: 12159 HALT_UNALLOC; 12160 } 12161 } 12162 else if (type == 0xC) 12163 { 12164 sizeof_operation = INSTR (21, 21) ? 2 : 1; 12165 sizeof_operation <<= INSTR (11, 10); 12166 } 12167 else if (type == 0xE) 12168 { 12169 sizeof_operation = INSTR (21, 21) ? 4 : 3; 12170 sizeof_operation <<= INSTR (11, 10); 12171 } 12172 else 12173 HALT_UNALLOC; 12174 } 12175 else 12176 { 12177 switch (type) 12178 { 12179 case 0: sizeof_operation = 32; break; 12180 case 4: sizeof_operation = 24; break; 12181 case 8: sizeof_operation = 16; break; 12182 12183 case 7: 12184 /* One register, immediate offset variant. */ 12185 sizeof_operation = 8; 12186 break; 12187 12188 case 10: 12189 /* Two registers, immediate offset variant. */ 12190 sizeof_operation = 16; 12191 break; 12192 12193 case 6: 12194 /* Three registers, immediate offset variant. */ 12195 sizeof_operation = 24; 12196 break; 12197 12198 case 2: 12199 /* Four registers, immediate offset variant. */ 12200 sizeof_operation = 32; 12201 break; 12202 12203 default: 12204 HALT_UNALLOC; 12205 } 12206 12207 if (INSTR (30, 30)) 12208 sizeof_operation *= 2; 12209 } 12210 12211 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation); 12212 } 12213 else 12214 aarch64_set_reg_u64 (cpu, vn, SP_OK, 12215 address + aarch64_get_reg_u64 (cpu, vm, NO_SP)); 12216 } 12217 else 12218 { 12219 NYI_assert (20, 16, 0); 12220 } 12221 12222 if (single) 12223 { 12224 if (load) 12225 { 12226 if ((type >= 0) && (type <= 11)) 12227 do_vec_LDn_single (cpu, address); 12228 else if ((type == 0xC) || (type == 0xE)) 12229 do_vec_LDnR (cpu, address); 12230 else 12231 HALT_UNALLOC; 12232 return; 12233 } 12234 12235 /* Stores. */ 12236 if ((type >= 0) && (type <= 11)) 12237 { 12238 do_vec_STn_single (cpu, address); 12239 return; 12240 } 12241 12242 HALT_UNALLOC; 12243 } 12244 12245 if (load) 12246 { 12247 switch (type) 12248 { 12249 case 0: LD4 (cpu, address); return; 12250 case 4: LD3 (cpu, address); return; 12251 case 8: LD2 (cpu, address); return; 12252 case 2: LD1_4 (cpu, address); return; 12253 case 6: LD1_3 (cpu, address); return; 12254 case 10: LD1_2 (cpu, address); return; 12255 case 7: LD1_1 (cpu, address); return; 12256 12257 default: 12258 HALT_UNALLOC; 12259 } 12260 } 12261 12262 /* Stores. */ 12263 switch (type) 12264 { 12265 case 0: ST4 (cpu, address); return; 12266 case 4: ST3 (cpu, address); return; 12267 case 8: ST2 (cpu, address); return; 12268 case 2: ST1_4 (cpu, address); return; 12269 case 6: ST1_3 (cpu, address); return; 12270 case 10: ST1_2 (cpu, address); return; 12271 case 7: ST1_1 (cpu, address); return; 12272 default: 12273 HALT_UNALLOC; 12274 } 12275 } 12276 12277 static void 12278 dexLdSt (sim_cpu *cpu) 12279 { 12280 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); 12281 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 || 12282 group == GROUP_LDST_1100 || group == GROUP_LDST_1110 12283 bits [29,28:26] of a LS are the secondary dispatch vector. */ 12284 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu)); 12285 12286 switch (group2) 12287 { 12288 case LS_EXCL_000: 12289 dexLoadExclusive (cpu); return; 12290 12291 case LS_LIT_010: 12292 case LS_LIT_011: 12293 dexLoadLiteral (cpu); return; 12294 12295 case LS_OTHER_110: 12296 case LS_OTHER_111: 12297 dexLoadOther (cpu); return; 12298 12299 case LS_ADVSIMD_001: 12300 do_vec_load_store (cpu); return; 12301 12302 case LS_PAIR_100: 12303 dex_load_store_pair_gr (cpu); return; 12304 12305 case LS_PAIR_101: 12306 dex_load_store_pair_fp (cpu); return; 12307 12308 default: 12309 /* Should never reach here. */ 12310 HALT_NYI; 12311 } 12312 } 12313 12314 /* Specific decode and execute for group Data Processing Register. */ 12315 12316 static void 12317 dexLogicalShiftedRegister (sim_cpu *cpu) 12318 { 12319 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 12320 instr[30,29] = op 12321 instr[28:24] = 01010 12322 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR 12323 instr[21] = N 12324 instr[20,16] = Rm 12325 instr[15,10] = count : must be 0xxxxx for 32 bit 12326 instr[9,5] = Rn 12327 instr[4,0] = Rd */ 12328 12329 uint32_t size = INSTR (31, 31); 12330 Shift shiftType = INSTR (23, 22); 12331 uint32_t count = INSTR (15, 10); 12332 12333 /* 32 bit operations must have count[5] = 0. 12334 or else we have an UNALLOC. */ 12335 if (size == 0 && uimm (count, 5, 5)) 12336 HALT_UNALLOC; 12337 12338 /* Dispatch on size:op:N. */ 12339 switch ((INSTR (31, 29) << 1) | INSTR (21, 21)) 12340 { 12341 case 0: and32_shift (cpu, shiftType, count); return; 12342 case 1: bic32_shift (cpu, shiftType, count); return; 12343 case 2: orr32_shift (cpu, shiftType, count); return; 12344 case 3: orn32_shift (cpu, shiftType, count); return; 12345 case 4: eor32_shift (cpu, shiftType, count); return; 12346 case 5: eon32_shift (cpu, shiftType, count); return; 12347 case 6: ands32_shift (cpu, shiftType, count); return; 12348 case 7: bics32_shift (cpu, shiftType, count); return; 12349 case 8: and64_shift (cpu, shiftType, count); return; 12350 case 9: bic64_shift (cpu, shiftType, count); return; 12351 case 10:orr64_shift (cpu, shiftType, count); return; 12352 case 11:orn64_shift (cpu, shiftType, count); return; 12353 case 12:eor64_shift (cpu, shiftType, count); return; 12354 case 13:eon64_shift (cpu, shiftType, count); return; 12355 case 14:ands64_shift (cpu, shiftType, count); return; 12356 case 15:bics64_shift (cpu, shiftType, count); return; 12357 } 12358 } 12359 12360 /* 32 bit conditional select. */ 12361 static void 12362 csel32 (sim_cpu *cpu, CondCode cc) 12363 { 12364 unsigned rm = INSTR (20, 16); 12365 unsigned rn = INSTR (9, 5); 12366 unsigned rd = INSTR (4, 0); 12367 12368 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12369 testConditionCode (cpu, cc) 12370 ? aarch64_get_reg_u32 (cpu, rn, NO_SP) 12371 : aarch64_get_reg_u32 (cpu, rm, NO_SP)); 12372 } 12373 12374 /* 64 bit conditional select. */ 12375 static void 12376 csel64 (sim_cpu *cpu, CondCode cc) 12377 { 12378 unsigned rm = INSTR (20, 16); 12379 unsigned rn = INSTR (9, 5); 12380 unsigned rd = INSTR (4, 0); 12381 12382 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12383 testConditionCode (cpu, cc) 12384 ? aarch64_get_reg_u64 (cpu, rn, NO_SP) 12385 : aarch64_get_reg_u64 (cpu, rm, NO_SP)); 12386 } 12387 12388 /* 32 bit conditional increment. */ 12389 static void 12390 csinc32 (sim_cpu *cpu, CondCode cc) 12391 { 12392 unsigned rm = INSTR (20, 16); 12393 unsigned rn = INSTR (9, 5); 12394 unsigned rd = INSTR (4, 0); 12395 12396 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12397 testConditionCode (cpu, cc) 12398 ? aarch64_get_reg_u32 (cpu, rn, NO_SP) 12399 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1); 12400 } 12401 12402 /* 64 bit conditional increment. */ 12403 static void 12404 csinc64 (sim_cpu *cpu, CondCode cc) 12405 { 12406 unsigned rm = INSTR (20, 16); 12407 unsigned rn = INSTR (9, 5); 12408 unsigned rd = INSTR (4, 0); 12409 12410 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12411 testConditionCode (cpu, cc) 12412 ? aarch64_get_reg_u64 (cpu, rn, NO_SP) 12413 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1); 12414 } 12415 12416 /* 32 bit conditional invert. */ 12417 static void 12418 csinv32 (sim_cpu *cpu, CondCode cc) 12419 { 12420 unsigned rm = INSTR (20, 16); 12421 unsigned rn = INSTR (9, 5); 12422 unsigned rd = INSTR (4, 0); 12423 12424 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12425 testConditionCode (cpu, cc) 12426 ? aarch64_get_reg_u32 (cpu, rn, NO_SP) 12427 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP)); 12428 } 12429 12430 /* 64 bit conditional invert. */ 12431 static void 12432 csinv64 (sim_cpu *cpu, CondCode cc) 12433 { 12434 unsigned rm = INSTR (20, 16); 12435 unsigned rn = INSTR (9, 5); 12436 unsigned rd = INSTR (4, 0); 12437 12438 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12439 testConditionCode (cpu, cc) 12440 ? aarch64_get_reg_u64 (cpu, rn, NO_SP) 12441 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP)); 12442 } 12443 12444 /* 32 bit conditional negate. */ 12445 static void 12446 csneg32 (sim_cpu *cpu, CondCode cc) 12447 { 12448 unsigned rm = INSTR (20, 16); 12449 unsigned rn = INSTR (9, 5); 12450 unsigned rd = INSTR (4, 0); 12451 12452 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12453 testConditionCode (cpu, cc) 12454 ? aarch64_get_reg_u32 (cpu, rn, NO_SP) 12455 : - aarch64_get_reg_u32 (cpu, rm, NO_SP)); 12456 } 12457 12458 /* 64 bit conditional negate. */ 12459 static void 12460 csneg64 (sim_cpu *cpu, CondCode cc) 12461 { 12462 unsigned rm = INSTR (20, 16); 12463 unsigned rn = INSTR (9, 5); 12464 unsigned rd = INSTR (4, 0); 12465 12466 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12467 testConditionCode (cpu, cc) 12468 ? aarch64_get_reg_u64 (cpu, rn, NO_SP) 12469 : - aarch64_get_reg_u64 (cpu, rm, NO_SP)); 12470 } 12471 12472 static void 12473 dexCondSelect (sim_cpu *cpu) 12474 { 12475 /* instr[28,21] = 11011011 12476 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 12477 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC, 12478 100 ==> CSINV, 101 ==> CSNEG, 12479 _1_ ==> UNALLOC 12480 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC 12481 instr[15,12] = cond 12482 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */ 12483 12484 CondCode cc = INSTR (15, 12); 12485 uint32_t S = INSTR (29, 29); 12486 uint32_t op2 = INSTR (11, 10); 12487 12488 if (S == 1) 12489 HALT_UNALLOC; 12490 12491 if (op2 & 0x2) 12492 HALT_UNALLOC; 12493 12494 switch ((INSTR (31, 30) << 1) | op2) 12495 { 12496 case 0: csel32 (cpu, cc); return; 12497 case 1: csinc32 (cpu, cc); return; 12498 case 2: csinv32 (cpu, cc); return; 12499 case 3: csneg32 (cpu, cc); return; 12500 case 4: csel64 (cpu, cc); return; 12501 case 5: csinc64 (cpu, cc); return; 12502 case 6: csinv64 (cpu, cc); return; 12503 case 7: csneg64 (cpu, cc); return; 12504 } 12505 } 12506 12507 /* Some helpers for counting leading 1 or 0 bits. */ 12508 12509 /* Counts the number of leading bits which are the same 12510 in a 32 bit value in the range 1 to 32. */ 12511 static uint32_t 12512 leading32 (uint32_t value) 12513 { 12514 int32_t mask= 0xffff0000; 12515 uint32_t count= 16; /* Counts number of bits set in mask. */ 12516 uint32_t lo = 1; /* Lower bound for number of sign bits. */ 12517 uint32_t hi = 32; /* Upper bound for number of sign bits. */ 12518 12519 while (lo + 1 < hi) 12520 { 12521 int32_t test = (value & mask); 12522 12523 if (test == 0 || test == mask) 12524 { 12525 lo = count; 12526 count = (lo + hi) / 2; 12527 mask >>= (count - lo); 12528 } 12529 else 12530 { 12531 hi = count; 12532 count = (lo + hi) / 2; 12533 mask <<= hi - count; 12534 } 12535 } 12536 12537 if (lo != hi) 12538 { 12539 int32_t test; 12540 12541 mask >>= 1; 12542 test = (value & mask); 12543 12544 if (test == 0 || test == mask) 12545 count = hi; 12546 else 12547 count = lo; 12548 } 12549 12550 return count; 12551 } 12552 12553 /* Counts the number of leading bits which are the same 12554 in a 64 bit value in the range 1 to 64. */ 12555 static uint64_t 12556 leading64 (uint64_t value) 12557 { 12558 int64_t mask= 0xffffffff00000000LL; 12559 uint64_t count = 32; /* Counts number of bits set in mask. */ 12560 uint64_t lo = 1; /* Lower bound for number of sign bits. */ 12561 uint64_t hi = 64; /* Upper bound for number of sign bits. */ 12562 12563 while (lo + 1 < hi) 12564 { 12565 int64_t test = (value & mask); 12566 12567 if (test == 0 || test == mask) 12568 { 12569 lo = count; 12570 count = (lo + hi) / 2; 12571 mask >>= (count - lo); 12572 } 12573 else 12574 { 12575 hi = count; 12576 count = (lo + hi) / 2; 12577 mask <<= hi - count; 12578 } 12579 } 12580 12581 if (lo != hi) 12582 { 12583 int64_t test; 12584 12585 mask >>= 1; 12586 test = (value & mask); 12587 12588 if (test == 0 || test == mask) 12589 count = hi; 12590 else 12591 count = lo; 12592 } 12593 12594 return count; 12595 } 12596 12597 /* Bit operations. */ 12598 /* N.B register args may not be SP. */ 12599 12600 /* 32 bit count leading sign bits. */ 12601 static void 12602 cls32 (sim_cpu *cpu) 12603 { 12604 unsigned rn = INSTR (9, 5); 12605 unsigned rd = INSTR (4, 0); 12606 12607 /* N.B. the result needs to exclude the leading bit. */ 12608 aarch64_set_reg_u64 12609 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1); 12610 } 12611 12612 /* 64 bit count leading sign bits. */ 12613 static void 12614 cls64 (sim_cpu *cpu) 12615 { 12616 unsigned rn = INSTR (9, 5); 12617 unsigned rd = INSTR (4, 0); 12618 12619 /* N.B. the result needs to exclude the leading bit. */ 12620 aarch64_set_reg_u64 12621 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1); 12622 } 12623 12624 /* 32 bit count leading zero bits. */ 12625 static void 12626 clz32 (sim_cpu *cpu) 12627 { 12628 unsigned rn = INSTR (9, 5); 12629 unsigned rd = INSTR (4, 0); 12630 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12631 12632 /* if the sign (top) bit is set then the count is 0. */ 12633 if (pick32 (value, 31, 31)) 12634 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L); 12635 else 12636 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value)); 12637 } 12638 12639 /* 64 bit count leading zero bits. */ 12640 static void 12641 clz64 (sim_cpu *cpu) 12642 { 12643 unsigned rn = INSTR (9, 5); 12644 unsigned rd = INSTR (4, 0); 12645 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 12646 12647 /* if the sign (top) bit is set then the count is 0. */ 12648 if (pick64 (value, 63, 63)) 12649 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L); 12650 else 12651 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value)); 12652 } 12653 12654 /* 32 bit reverse bits. */ 12655 static void 12656 rbit32 (sim_cpu *cpu) 12657 { 12658 unsigned rn = INSTR (9, 5); 12659 unsigned rd = INSTR (4, 0); 12660 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12661 uint32_t result = 0; 12662 int i; 12663 12664 for (i = 0; i < 32; i++) 12665 { 12666 result <<= 1; 12667 result |= (value & 1); 12668 value >>= 1; 12669 } 12670 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12671 } 12672 12673 /* 64 bit reverse bits. */ 12674 static void 12675 rbit64 (sim_cpu *cpu) 12676 { 12677 unsigned rn = INSTR (9, 5); 12678 unsigned rd = INSTR (4, 0); 12679 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 12680 uint64_t result = 0; 12681 int i; 12682 12683 for (i = 0; i < 64; i++) 12684 { 12685 result <<= 1; 12686 result |= (value & 1UL); 12687 value >>= 1; 12688 } 12689 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12690 } 12691 12692 /* 32 bit reverse bytes. */ 12693 static void 12694 rev32 (sim_cpu *cpu) 12695 { 12696 unsigned rn = INSTR (9, 5); 12697 unsigned rd = INSTR (4, 0); 12698 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12699 uint32_t result = 0; 12700 int i; 12701 12702 for (i = 0; i < 4; i++) 12703 { 12704 result <<= 8; 12705 result |= (value & 0xff); 12706 value >>= 8; 12707 } 12708 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12709 } 12710 12711 /* 64 bit reverse bytes. */ 12712 static void 12713 rev64 (sim_cpu *cpu) 12714 { 12715 unsigned rn = INSTR (9, 5); 12716 unsigned rd = INSTR (4, 0); 12717 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 12718 uint64_t result = 0; 12719 int i; 12720 12721 for (i = 0; i < 8; i++) 12722 { 12723 result <<= 8; 12724 result |= (value & 0xffULL); 12725 value >>= 8; 12726 } 12727 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12728 } 12729 12730 /* 32 bit reverse shorts. */ 12731 /* N.B.this reverses the order of the bytes in each half word. */ 12732 static void 12733 revh32 (sim_cpu *cpu) 12734 { 12735 unsigned rn = INSTR (9, 5); 12736 unsigned rd = INSTR (4, 0); 12737 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12738 uint32_t result = 0; 12739 int i; 12740 12741 for (i = 0; i < 2; i++) 12742 { 12743 result <<= 8; 12744 result |= (value & 0x00ff00ff); 12745 value >>= 8; 12746 } 12747 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12748 } 12749 12750 /* 64 bit reverse shorts. */ 12751 /* N.B.this reverses the order of the bytes in each half word. */ 12752 static void 12753 revh64 (sim_cpu *cpu) 12754 { 12755 unsigned rn = INSTR (9, 5); 12756 unsigned rd = INSTR (4, 0); 12757 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 12758 uint64_t result = 0; 12759 int i; 12760 12761 for (i = 0; i < 2; i++) 12762 { 12763 result <<= 8; 12764 result |= (value & 0x00ff00ff00ff00ffULL); 12765 value >>= 8; 12766 } 12767 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12768 } 12769 12770 static void 12771 dexDataProc1Source (sim_cpu *cpu) 12772 { 12773 /* instr[30] = 1 12774 instr[28,21] = 111010110 12775 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 12776 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC 12777 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC 12778 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16, 12779 000010 ==> REV, 000011 ==> UNALLOC 12780 000100 ==> CLZ, 000101 ==> CLS 12781 ow ==> UNALLOC 12782 instr[9,5] = rn : may not be SP 12783 instr[4,0] = rd : may not be SP. */ 12784 12785 uint32_t S = INSTR (29, 29); 12786 uint32_t opcode2 = INSTR (20, 16); 12787 uint32_t opcode = INSTR (15, 10); 12788 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode); 12789 12790 if (S == 1) 12791 HALT_UNALLOC; 12792 12793 if (opcode2 != 0) 12794 HALT_UNALLOC; 12795 12796 if (opcode & 0x38) 12797 HALT_UNALLOC; 12798 12799 switch (dispatch) 12800 { 12801 case 0: rbit32 (cpu); return; 12802 case 1: revh32 (cpu); return; 12803 case 2: rev32 (cpu); return; 12804 case 4: clz32 (cpu); return; 12805 case 5: cls32 (cpu); return; 12806 case 8: rbit64 (cpu); return; 12807 case 9: revh64 (cpu); return; 12808 case 10:rev32 (cpu); return; 12809 case 11:rev64 (cpu); return; 12810 case 12:clz64 (cpu); return; 12811 case 13:cls64 (cpu); return; 12812 default: HALT_UNALLOC; 12813 } 12814 } 12815 12816 /* Variable shift. 12817 Shifts by count supplied in register. 12818 N.B register args may not be SP. 12819 These all use the shifted auxiliary function for 12820 simplicity and clarity. Writing the actual shift 12821 inline would avoid a branch and so be faster but 12822 would also necessitate getting signs right. */ 12823 12824 /* 32 bit arithmetic shift right. */ 12825 static void 12826 asrv32 (sim_cpu *cpu) 12827 { 12828 unsigned rm = INSTR (20, 16); 12829 unsigned rn = INSTR (9, 5); 12830 unsigned rd = INSTR (4, 0); 12831 12832 aarch64_set_reg_u64 12833 (cpu, rd, NO_SP, 12834 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR, 12835 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); 12836 } 12837 12838 /* 64 bit arithmetic shift right. */ 12839 static void 12840 asrv64 (sim_cpu *cpu) 12841 { 12842 unsigned rm = INSTR (20, 16); 12843 unsigned rn = INSTR (9, 5); 12844 unsigned rd = INSTR (4, 0); 12845 12846 aarch64_set_reg_u64 12847 (cpu, rd, NO_SP, 12848 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR, 12849 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); 12850 } 12851 12852 /* 32 bit logical shift left. */ 12853 static void 12854 lslv32 (sim_cpu *cpu) 12855 { 12856 unsigned rm = INSTR (20, 16); 12857 unsigned rn = INSTR (9, 5); 12858 unsigned rd = INSTR (4, 0); 12859 12860 aarch64_set_reg_u64 12861 (cpu, rd, NO_SP, 12862 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL, 12863 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); 12864 } 12865 12866 /* 64 bit arithmetic shift left. */ 12867 static void 12868 lslv64 (sim_cpu *cpu) 12869 { 12870 unsigned rm = INSTR (20, 16); 12871 unsigned rn = INSTR (9, 5); 12872 unsigned rd = INSTR (4, 0); 12873 12874 aarch64_set_reg_u64 12875 (cpu, rd, NO_SP, 12876 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL, 12877 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); 12878 } 12879 12880 /* 32 bit logical shift right. */ 12881 static void 12882 lsrv32 (sim_cpu *cpu) 12883 { 12884 unsigned rm = INSTR (20, 16); 12885 unsigned rn = INSTR (9, 5); 12886 unsigned rd = INSTR (4, 0); 12887 12888 aarch64_set_reg_u64 12889 (cpu, rd, NO_SP, 12890 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR, 12891 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); 12892 } 12893 12894 /* 64 bit logical shift right. */ 12895 static void 12896 lsrv64 (sim_cpu *cpu) 12897 { 12898 unsigned rm = INSTR (20, 16); 12899 unsigned rn = INSTR (9, 5); 12900 unsigned rd = INSTR (4, 0); 12901 12902 aarch64_set_reg_u64 12903 (cpu, rd, NO_SP, 12904 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR, 12905 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); 12906 } 12907 12908 /* 32 bit rotate right. */ 12909 static void 12910 rorv32 (sim_cpu *cpu) 12911 { 12912 unsigned rm = INSTR (20, 16); 12913 unsigned rn = INSTR (9, 5); 12914 unsigned rd = INSTR (4, 0); 12915 12916 aarch64_set_reg_u64 12917 (cpu, rd, NO_SP, 12918 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR, 12919 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); 12920 } 12921 12922 /* 64 bit rotate right. */ 12923 static void 12924 rorv64 (sim_cpu *cpu) 12925 { 12926 unsigned rm = INSTR (20, 16); 12927 unsigned rn = INSTR (9, 5); 12928 unsigned rd = INSTR (4, 0); 12929 12930 aarch64_set_reg_u64 12931 (cpu, rd, NO_SP, 12932 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR, 12933 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); 12934 } 12935 12936 12937 /* divide. */ 12938 12939 /* 32 bit signed divide. */ 12940 static void 12941 cpuiv32 (sim_cpu *cpu) 12942 { 12943 unsigned rm = INSTR (20, 16); 12944 unsigned rn = INSTR (9, 5); 12945 unsigned rd = INSTR (4, 0); 12946 /* N.B. the pseudo-code does the divide using 64 bit data. */ 12947 /* TODO : check that this rounds towards zero as required. */ 12948 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP); 12949 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP); 12950 12951 aarch64_set_reg_s64 (cpu, rd, NO_SP, 12952 divisor ? ((int32_t) (dividend / divisor)) : 0); 12953 } 12954 12955 /* 64 bit signed divide. */ 12956 static void 12957 cpuiv64 (sim_cpu *cpu) 12958 { 12959 unsigned rm = INSTR (20, 16); 12960 unsigned rn = INSTR (9, 5); 12961 unsigned rd = INSTR (4, 0); 12962 12963 /* TODO : check that this rounds towards zero as required. */ 12964 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP); 12965 12966 aarch64_set_reg_s64 12967 (cpu, rd, NO_SP, 12968 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0); 12969 } 12970 12971 /* 32 bit unsigned divide. */ 12972 static void 12973 udiv32 (sim_cpu *cpu) 12974 { 12975 unsigned rm = INSTR (20, 16); 12976 unsigned rn = INSTR (9, 5); 12977 unsigned rd = INSTR (4, 0); 12978 12979 /* N.B. the pseudo-code does the divide using 64 bit data. */ 12980 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12981 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP); 12982 12983 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12984 divisor ? (uint32_t) (dividend / divisor) : 0); 12985 } 12986 12987 /* 64 bit unsigned divide. */ 12988 static void 12989 udiv64 (sim_cpu *cpu) 12990 { 12991 unsigned rm = INSTR (20, 16); 12992 unsigned rn = INSTR (9, 5); 12993 unsigned rd = INSTR (4, 0); 12994 12995 /* TODO : check that this rounds towards zero as required. */ 12996 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP); 12997 12998 aarch64_set_reg_u64 12999 (cpu, rd, NO_SP, 13000 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0); 13001 } 13002 13003 static void 13004 dexDataProc2Source (sim_cpu *cpu) 13005 { 13006 /* assert instr[30] == 0 13007 instr[28,21] == 11010110 13008 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 13009 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC 13010 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV, 13011 001000 ==> LSLV, 001001 ==> LSRV 13012 001010 ==> ASRV, 001011 ==> RORV 13013 ow ==> UNALLOC. */ 13014 13015 uint32_t dispatch; 13016 uint32_t S = INSTR (29, 29); 13017 uint32_t opcode = INSTR (15, 10); 13018 13019 if (S == 1) 13020 HALT_UNALLOC; 13021 13022 if (opcode & 0x34) 13023 HALT_UNALLOC; 13024 13025 dispatch = ( (INSTR (31, 31) << 3) 13026 | (uimm (opcode, 3, 3) << 2) 13027 | uimm (opcode, 1, 0)); 13028 switch (dispatch) 13029 { 13030 case 2: udiv32 (cpu); return; 13031 case 3: cpuiv32 (cpu); return; 13032 case 4: lslv32 (cpu); return; 13033 case 5: lsrv32 (cpu); return; 13034 case 6: asrv32 (cpu); return; 13035 case 7: rorv32 (cpu); return; 13036 case 10: udiv64 (cpu); return; 13037 case 11: cpuiv64 (cpu); return; 13038 case 12: lslv64 (cpu); return; 13039 case 13: lsrv64 (cpu); return; 13040 case 14: asrv64 (cpu); return; 13041 case 15: rorv64 (cpu); return; 13042 default: HALT_UNALLOC; 13043 } 13044 } 13045 13046 13047 /* Multiply. */ 13048 13049 /* 32 bit multiply and add. */ 13050 static void 13051 madd32 (sim_cpu *cpu) 13052 { 13053 unsigned rm = INSTR (20, 16); 13054 unsigned ra = INSTR (14, 10); 13055 unsigned rn = INSTR (9, 5); 13056 unsigned rd = INSTR (4, 0); 13057 13058 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13059 aarch64_set_reg_u64 (cpu, rd, NO_SP, 13060 aarch64_get_reg_u32 (cpu, ra, NO_SP) 13061 + aarch64_get_reg_u32 (cpu, rn, NO_SP) 13062 * aarch64_get_reg_u32 (cpu, rm, NO_SP)); 13063 } 13064 13065 /* 64 bit multiply and add. */ 13066 static void 13067 madd64 (sim_cpu *cpu) 13068 { 13069 unsigned rm = INSTR (20, 16); 13070 unsigned ra = INSTR (14, 10); 13071 unsigned rn = INSTR (9, 5); 13072 unsigned rd = INSTR (4, 0); 13073 13074 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13075 aarch64_set_reg_u64 (cpu, rd, NO_SP, 13076 aarch64_get_reg_u64 (cpu, ra, NO_SP) 13077 + (aarch64_get_reg_u64 (cpu, rn, NO_SP) 13078 * aarch64_get_reg_u64 (cpu, rm, NO_SP))); 13079 } 13080 13081 /* 32 bit multiply and sub. */ 13082 static void 13083 msub32 (sim_cpu *cpu) 13084 { 13085 unsigned rm = INSTR (20, 16); 13086 unsigned ra = INSTR (14, 10); 13087 unsigned rn = INSTR (9, 5); 13088 unsigned rd = INSTR (4, 0); 13089 13090 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13091 aarch64_set_reg_u64 (cpu, rd, NO_SP, 13092 aarch64_get_reg_u32 (cpu, ra, NO_SP) 13093 - aarch64_get_reg_u32 (cpu, rn, NO_SP) 13094 * aarch64_get_reg_u32 (cpu, rm, NO_SP)); 13095 } 13096 13097 /* 64 bit multiply and sub. */ 13098 static void 13099 msub64 (sim_cpu *cpu) 13100 { 13101 unsigned rm = INSTR (20, 16); 13102 unsigned ra = INSTR (14, 10); 13103 unsigned rn = INSTR (9, 5); 13104 unsigned rd = INSTR (4, 0); 13105 13106 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13107 aarch64_set_reg_u64 (cpu, rd, NO_SP, 13108 aarch64_get_reg_u64 (cpu, ra, NO_SP) 13109 - aarch64_get_reg_u64 (cpu, rn, NO_SP) 13110 * aarch64_get_reg_u64 (cpu, rm, NO_SP)); 13111 } 13112 13113 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */ 13114 static void 13115 smaddl (sim_cpu *cpu) 13116 { 13117 unsigned rm = INSTR (20, 16); 13118 unsigned ra = INSTR (14, 10); 13119 unsigned rn = INSTR (9, 5); 13120 unsigned rd = INSTR (4, 0); 13121 13122 /* N.B. we need to multiply the signed 32 bit values in rn, rm to 13123 obtain a 64 bit product. */ 13124 aarch64_set_reg_s64 13125 (cpu, rd, NO_SP, 13126 aarch64_get_reg_s64 (cpu, ra, NO_SP) 13127 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP)) 13128 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP))); 13129 } 13130 13131 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */ 13132 static void 13133 smsubl (sim_cpu *cpu) 13134 { 13135 unsigned rm = INSTR (20, 16); 13136 unsigned ra = INSTR (14, 10); 13137 unsigned rn = INSTR (9, 5); 13138 unsigned rd = INSTR (4, 0); 13139 13140 /* N.B. we need to multiply the signed 32 bit values in rn, rm to 13141 obtain a 64 bit product. */ 13142 aarch64_set_reg_s64 13143 (cpu, rd, NO_SP, 13144 aarch64_get_reg_s64 (cpu, ra, NO_SP) 13145 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP)) 13146 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP))); 13147 } 13148 13149 /* Integer Multiply/Divide. */ 13150 13151 /* First some macros and a helper function. */ 13152 /* Macros to test or access elements of 64 bit words. */ 13153 13154 /* Mask used to access lo 32 bits of 64 bit unsigned int. */ 13155 #define LOW_WORD_MASK ((1ULL << 32) - 1) 13156 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */ 13157 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK) 13158 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */ 13159 #define highWordToU64(_value_u64) ((_value_u64) >> 32) 13160 13161 /* Offset of sign bit in 64 bit signed integger. */ 13162 #define SIGN_SHIFT_U64 63 13163 /* The sign bit itself -- also identifies the minimum negative int value. */ 13164 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64) 13165 /* Return true if a 64 bit signed int presented as an unsigned int is the 13166 most negative value. */ 13167 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64) 13168 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned 13169 int has its sign bit set to false. */ 13170 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64) 13171 /* Return 1L or -1L according to whether a 64 bit signed int presented as 13172 an unsigned int has its sign bit set or not. */ 13173 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L) 13174 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */ 13175 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64) 13176 13177 /* Multiply two 64 bit ints and return. 13178 the hi 64 bits of the 128 bit product. */ 13179 13180 static uint64_t 13181 mul64hi (uint64_t value1, uint64_t value2) 13182 { 13183 uint64_t resultmid1; 13184 uint64_t result; 13185 uint64_t value1_lo = lowWordToU64 (value1); 13186 uint64_t value1_hi = highWordToU64 (value1) ; 13187 uint64_t value2_lo = lowWordToU64 (value2); 13188 uint64_t value2_hi = highWordToU64 (value2); 13189 13190 /* Cross-multiply and collect results. */ 13191 uint64_t xproductlo = value1_lo * value2_lo; 13192 uint64_t xproductmid1 = value1_lo * value2_hi; 13193 uint64_t xproductmid2 = value1_hi * value2_lo; 13194 uint64_t xproducthi = value1_hi * value2_hi; 13195 uint64_t carry = 0; 13196 /* Start accumulating 64 bit results. */ 13197 /* Drop bottom half of lowest cross-product. */ 13198 uint64_t resultmid = xproductlo >> 32; 13199 /* Add in middle products. */ 13200 resultmid = resultmid + xproductmid1; 13201 13202 /* Check for overflow. */ 13203 if (resultmid < xproductmid1) 13204 /* Carry over 1 into top cross-product. */ 13205 carry++; 13206 13207 resultmid1 = resultmid + xproductmid2; 13208 13209 /* Check for overflow. */ 13210 if (resultmid1 < xproductmid2) 13211 /* Carry over 1 into top cross-product. */ 13212 carry++; 13213 13214 /* Drop lowest 32 bits of middle cross-product. */ 13215 result = resultmid1 >> 32; 13216 /* Move carry bit to just above middle cross-product highest bit. */ 13217 carry = carry << 32; 13218 13219 /* Add top cross-product plus and any carry. */ 13220 result += xproducthi + carry; 13221 13222 return result; 13223 } 13224 13225 /* Signed multiply high, source, source2 : 13226 64 bit, dest <-- high 64-bit of result. */ 13227 static void 13228 smulh (sim_cpu *cpu) 13229 { 13230 uint64_t uresult; 13231 int64_t result; 13232 unsigned rm = INSTR (20, 16); 13233 unsigned rn = INSTR (9, 5); 13234 unsigned rd = INSTR (4, 0); 13235 GReg ra = INSTR (14, 10); 13236 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 13237 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); 13238 uint64_t uvalue1; 13239 uint64_t uvalue2; 13240 int negate = 0; 13241 13242 if (ra != R31) 13243 HALT_UNALLOC; 13244 13245 /* Convert to unsigned and use the unsigned mul64hi routine 13246 the fix the sign up afterwards. */ 13247 if (value1 < 0) 13248 { 13249 negate = !negate; 13250 uvalue1 = -value1; 13251 } 13252 else 13253 { 13254 uvalue1 = value1; 13255 } 13256 13257 if (value2 < 0) 13258 { 13259 negate = !negate; 13260 uvalue2 = -value2; 13261 } 13262 else 13263 { 13264 uvalue2 = value2; 13265 } 13266 13267 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13268 13269 uresult = mul64hi (uvalue1, uvalue2); 13270 result = uresult; 13271 13272 if (negate) 13273 { 13274 /* Multiply 128-bit result by -1, which means highpart gets inverted, 13275 and has carry in added only if low part is 0. */ 13276 result = ~result; 13277 if ((uvalue1 * uvalue2) == 0) 13278 result += 1; 13279 } 13280 13281 aarch64_set_reg_s64 (cpu, rd, NO_SP, result); 13282 } 13283 13284 /* Unsigned multiply add long -- source, source2 : 13285 32 bit, source3 : 64 bit. */ 13286 static void 13287 umaddl (sim_cpu *cpu) 13288 { 13289 unsigned rm = INSTR (20, 16); 13290 unsigned ra = INSTR (14, 10); 13291 unsigned rn = INSTR (9, 5); 13292 unsigned rd = INSTR (4, 0); 13293 13294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13295 /* N.B. we need to multiply the signed 32 bit values in rn, rm to 13296 obtain a 64 bit product. */ 13297 aarch64_set_reg_u64 13298 (cpu, rd, NO_SP, 13299 aarch64_get_reg_u64 (cpu, ra, NO_SP) 13300 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP)) 13301 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP))); 13302 } 13303 13304 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */ 13305 static void 13306 umsubl (sim_cpu *cpu) 13307 { 13308 unsigned rm = INSTR (20, 16); 13309 unsigned ra = INSTR (14, 10); 13310 unsigned rn = INSTR (9, 5); 13311 unsigned rd = INSTR (4, 0); 13312 13313 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13314 /* N.B. we need to multiply the signed 32 bit values in rn, rm to 13315 obtain a 64 bit product. */ 13316 aarch64_set_reg_u64 13317 (cpu, rd, NO_SP, 13318 aarch64_get_reg_u64 (cpu, ra, NO_SP) 13319 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP)) 13320 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP))); 13321 } 13322 13323 /* Unsigned multiply high, source, source2 : 13324 64 bit, dest <-- high 64-bit of result. */ 13325 static void 13326 umulh (sim_cpu *cpu) 13327 { 13328 unsigned rm = INSTR (20, 16); 13329 unsigned rn = INSTR (9, 5); 13330 unsigned rd = INSTR (4, 0); 13331 GReg ra = INSTR (14, 10); 13332 13333 if (ra != R31) 13334 HALT_UNALLOC; 13335 13336 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13337 aarch64_set_reg_u64 (cpu, rd, NO_SP, 13338 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP), 13339 aarch64_get_reg_u64 (cpu, rm, NO_SP))); 13340 } 13341 13342 static void 13343 dexDataProc3Source (sim_cpu *cpu) 13344 { 13345 /* assert instr[28,24] == 11011. */ 13346 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least) 13347 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC 13348 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok 13349 instr[15] = o0 : 0/1 ==> ok 13350 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit) 13351 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only) 13352 0100 ==> SMULH, (64 bit only) 13353 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only) 13354 1100 ==> UMULH (64 bit only) 13355 ow ==> UNALLOC. */ 13356 13357 uint32_t dispatch; 13358 uint32_t size = INSTR (31, 31); 13359 uint32_t op54 = INSTR (30, 29); 13360 uint32_t op31 = INSTR (23, 21); 13361 uint32_t o0 = INSTR (15, 15); 13362 13363 if (op54 != 0) 13364 HALT_UNALLOC; 13365 13366 if (size == 0) 13367 { 13368 if (op31 != 0) 13369 HALT_UNALLOC; 13370 13371 if (o0 == 0) 13372 madd32 (cpu); 13373 else 13374 msub32 (cpu); 13375 return; 13376 } 13377 13378 dispatch = (op31 << 1) | o0; 13379 13380 switch (dispatch) 13381 { 13382 case 0: madd64 (cpu); return; 13383 case 1: msub64 (cpu); return; 13384 case 2: smaddl (cpu); return; 13385 case 3: smsubl (cpu); return; 13386 case 4: smulh (cpu); return; 13387 case 10: umaddl (cpu); return; 13388 case 11: umsubl (cpu); return; 13389 case 12: umulh (cpu); return; 13390 default: HALT_UNALLOC; 13391 } 13392 } 13393 13394 static void 13395 dexDPReg (sim_cpu *cpu) 13396 { 13397 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); 13398 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101 13399 bits [28:24:21] of a DPReg are the secondary dispatch vector. */ 13400 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu)); 13401 13402 switch (group2) 13403 { 13404 case DPREG_LOG_000: 13405 case DPREG_LOG_001: 13406 dexLogicalShiftedRegister (cpu); return; 13407 13408 case DPREG_ADDSHF_010: 13409 dexAddSubtractShiftedRegister (cpu); return; 13410 13411 case DPREG_ADDEXT_011: 13412 dexAddSubtractExtendedRegister (cpu); return; 13413 13414 case DPREG_ADDCOND_100: 13415 { 13416 /* This set bundles a variety of different operations. */ 13417 /* Check for. */ 13418 /* 1) add/sub w carry. */ 13419 uint32_t mask1 = 0x1FE00000U; 13420 uint32_t val1 = 0x1A000000U; 13421 /* 2) cond compare register/immediate. */ 13422 uint32_t mask2 = 0x1FE00000U; 13423 uint32_t val2 = 0x1A400000U; 13424 /* 3) cond select. */ 13425 uint32_t mask3 = 0x1FE00000U; 13426 uint32_t val3 = 0x1A800000U; 13427 /* 4) data proc 1/2 source. */ 13428 uint32_t mask4 = 0x1FE00000U; 13429 uint32_t val4 = 0x1AC00000U; 13430 13431 if ((aarch64_get_instr (cpu) & mask1) == val1) 13432 dexAddSubtractWithCarry (cpu); 13433 13434 else if ((aarch64_get_instr (cpu) & mask2) == val2) 13435 CondCompare (cpu); 13436 13437 else if ((aarch64_get_instr (cpu) & mask3) == val3) 13438 dexCondSelect (cpu); 13439 13440 else if ((aarch64_get_instr (cpu) & mask4) == val4) 13441 { 13442 /* Bit 30 is clear for data proc 2 source 13443 and set for data proc 1 source. */ 13444 if (aarch64_get_instr (cpu) & (1U << 30)) 13445 dexDataProc1Source (cpu); 13446 else 13447 dexDataProc2Source (cpu); 13448 } 13449 13450 else 13451 /* Should not reach here. */ 13452 HALT_NYI; 13453 13454 return; 13455 } 13456 13457 case DPREG_3SRC_110: 13458 dexDataProc3Source (cpu); return; 13459 13460 case DPREG_UNALLOC_101: 13461 HALT_UNALLOC; 13462 13463 case DPREG_3SRC_111: 13464 dexDataProc3Source (cpu); return; 13465 13466 default: 13467 /* Should never reach here. */ 13468 HALT_NYI; 13469 } 13470 } 13471 13472 /* Unconditional Branch immediate. 13473 Offset is a PC-relative byte offset in the range +/- 128MiB. 13474 The offset is assumed to be raw from the decode i.e. the 13475 simulator is expected to scale them from word offsets to byte. */ 13476 13477 /* Unconditional branch. */ 13478 static void 13479 buc (sim_cpu *cpu, int32_t offset) 13480 { 13481 aarch64_set_next_PC_by_offset (cpu, offset); 13482 } 13483 13484 static unsigned stack_depth = 0; 13485 13486 /* Unconditional branch and link -- writes return PC to LR. */ 13487 static void 13488 bl (sim_cpu *cpu, int32_t offset) 13489 { 13490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13491 aarch64_save_LR (cpu); 13492 aarch64_set_next_PC_by_offset (cpu, offset); 13493 13494 if (TRACE_BRANCH_P (cpu)) 13495 { 13496 ++ stack_depth; 13497 TRACE_BRANCH (cpu, 13498 " %*scall %" PRIx64 " [%s]" 13499 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", 13500 stack_depth, " ", aarch64_get_next_PC (cpu), 13501 aarch64_get_func (CPU_STATE (cpu), 13502 aarch64_get_next_PC (cpu)), 13503 aarch64_get_reg_u64 (cpu, 0, NO_SP), 13504 aarch64_get_reg_u64 (cpu, 1, NO_SP), 13505 aarch64_get_reg_u64 (cpu, 2, NO_SP) 13506 ); 13507 } 13508 } 13509 13510 /* Unconditional Branch register. 13511 Branch/return address is in source register. */ 13512 13513 /* Unconditional branch. */ 13514 static void 13515 br (sim_cpu *cpu) 13516 { 13517 unsigned rn = INSTR (9, 5); 13518 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13519 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 13520 } 13521 13522 /* Unconditional branch and link -- writes return PC to LR. */ 13523 static void 13524 blr (sim_cpu *cpu) 13525 { 13526 unsigned rn = INSTR (9, 5); 13527 13528 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13529 /* The pseudo code in the spec says we update LR before fetching. 13530 the value from the rn. */ 13531 aarch64_save_LR (cpu); 13532 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 13533 13534 if (TRACE_BRANCH_P (cpu)) 13535 { 13536 ++ stack_depth; 13537 TRACE_BRANCH (cpu, 13538 " %*scall %" PRIx64 " [%s]" 13539 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", 13540 stack_depth, " ", aarch64_get_next_PC (cpu), 13541 aarch64_get_func (CPU_STATE (cpu), 13542 aarch64_get_next_PC (cpu)), 13543 aarch64_get_reg_u64 (cpu, 0, NO_SP), 13544 aarch64_get_reg_u64 (cpu, 1, NO_SP), 13545 aarch64_get_reg_u64 (cpu, 2, NO_SP) 13546 ); 13547 } 13548 } 13549 13550 /* Return -- assembler will default source to LR this is functionally 13551 equivalent to br but, presumably, unlike br it side effects the 13552 branch predictor. */ 13553 static void 13554 ret (sim_cpu *cpu) 13555 { 13556 unsigned rn = INSTR (9, 5); 13557 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 13558 13559 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13560 if (TRACE_BRANCH_P (cpu)) 13561 { 13562 TRACE_BRANCH (cpu, 13563 " %*sreturn [result: %" PRIx64 "]", 13564 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP)); 13565 -- stack_depth; 13566 } 13567 } 13568 13569 /* NOP -- we implement this and call it from the decode in case we 13570 want to intercept it later. */ 13571 13572 static void 13573 nop (sim_cpu *cpu) 13574 { 13575 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13576 } 13577 13578 /* Data synchronization barrier. */ 13579 13580 static void 13581 dsb (sim_cpu *cpu) 13582 { 13583 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13584 } 13585 13586 /* Data memory barrier. */ 13587 13588 static void 13589 dmb (sim_cpu *cpu) 13590 { 13591 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13592 } 13593 13594 /* Instruction synchronization barrier. */ 13595 13596 static void 13597 isb (sim_cpu *cpu) 13598 { 13599 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13600 } 13601 13602 static void 13603 dexBranchImmediate (sim_cpu *cpu) 13604 { 13605 /* assert instr[30,26] == 00101 13606 instr[31] ==> 0 == B, 1 == BL 13607 instr[25,0] == imm26 branch offset counted in words. */ 13608 13609 uint32_t top = INSTR (31, 31); 13610 /* We have a 26 byte signed word offset which we need to pass to the 13611 execute routine as a signed byte offset. */ 13612 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2; 13613 13614 if (top) 13615 bl (cpu, offset); 13616 else 13617 buc (cpu, offset); 13618 } 13619 13620 /* Control Flow. */ 13621 13622 /* Conditional branch 13623 13624 Offset is a PC-relative byte offset in the range +/- 1MiB pos is 13625 a bit position in the range 0 .. 63 13626 13627 cc is a CondCode enum value as pulled out of the decode 13628 13629 N.B. any offset register (source) can only be Xn or Wn. */ 13630 13631 static void 13632 bcc (sim_cpu *cpu, int32_t offset, CondCode cc) 13633 { 13634 /* The test returns TRUE if CC is met. */ 13635 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13636 if (testConditionCode (cpu, cc)) 13637 aarch64_set_next_PC_by_offset (cpu, offset); 13638 } 13639 13640 /* 32 bit branch on register non-zero. */ 13641 static void 13642 cbnz32 (sim_cpu *cpu, int32_t offset) 13643 { 13644 unsigned rt = INSTR (4, 0); 13645 13646 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13647 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0) 13648 aarch64_set_next_PC_by_offset (cpu, offset); 13649 } 13650 13651 /* 64 bit branch on register zero. */ 13652 static void 13653 cbnz (sim_cpu *cpu, int32_t offset) 13654 { 13655 unsigned rt = INSTR (4, 0); 13656 13657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13658 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0) 13659 aarch64_set_next_PC_by_offset (cpu, offset); 13660 } 13661 13662 /* 32 bit branch on register non-zero. */ 13663 static void 13664 cbz32 (sim_cpu *cpu, int32_t offset) 13665 { 13666 unsigned rt = INSTR (4, 0); 13667 13668 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13669 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0) 13670 aarch64_set_next_PC_by_offset (cpu, offset); 13671 } 13672 13673 /* 64 bit branch on register zero. */ 13674 static void 13675 cbz (sim_cpu *cpu, int32_t offset) 13676 { 13677 unsigned rt = INSTR (4, 0); 13678 13679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13680 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0) 13681 aarch64_set_next_PC_by_offset (cpu, offset); 13682 } 13683 13684 /* Branch on register bit test non-zero -- one size fits all. */ 13685 static void 13686 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset) 13687 { 13688 unsigned rt = INSTR (4, 0); 13689 13690 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13691 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)) 13692 aarch64_set_next_PC_by_offset (cpu, offset); 13693 } 13694 13695 /* Branch on register bit test zero -- one size fits all. */ 13696 static void 13697 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset) 13698 { 13699 unsigned rt = INSTR (4, 0); 13700 13701 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13702 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))) 13703 aarch64_set_next_PC_by_offset (cpu, offset); 13704 } 13705 13706 static void 13707 dexCompareBranchImmediate (sim_cpu *cpu) 13708 { 13709 /* instr[30,25] = 01 1010 13710 instr[31] = size : 0 ==> 32, 1 ==> 64 13711 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ 13712 instr[23,5] = simm19 branch offset counted in words 13713 instr[4,0] = rt */ 13714 13715 uint32_t size = INSTR (31, 31); 13716 uint32_t op = INSTR (24, 24); 13717 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; 13718 13719 if (size == 0) 13720 { 13721 if (op == 0) 13722 cbz32 (cpu, offset); 13723 else 13724 cbnz32 (cpu, offset); 13725 } 13726 else 13727 { 13728 if (op == 0) 13729 cbz (cpu, offset); 13730 else 13731 cbnz (cpu, offset); 13732 } 13733 } 13734 13735 static void 13736 dexTestBranchImmediate (sim_cpu *cpu) 13737 { 13738 /* instr[31] = b5 : bit 5 of test bit idx 13739 instr[30,25] = 01 1011 13740 instr[24] = op : 0 ==> TBZ, 1 == TBNZ 13741 instr[23,19] = b40 : bits 4 to 0 of test bit idx 13742 instr[18,5] = simm14 : signed offset counted in words 13743 instr[4,0] = uimm5 */ 13744 13745 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19)); 13746 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2; 13747 13748 NYI_assert (30, 25, 0x1b); 13749 13750 if (INSTR (24, 24) == 0) 13751 tbz (cpu, pos, offset); 13752 else 13753 tbnz (cpu, pos, offset); 13754 } 13755 13756 static void 13757 dexCondBranchImmediate (sim_cpu *cpu) 13758 { 13759 /* instr[31,25] = 010 1010 13760 instr[24] = op1; op => 00 ==> B.cond 13761 instr[23,5] = simm19 : signed offset counted in words 13762 instr[4] = op0 13763 instr[3,0] = cond */ 13764 13765 int32_t offset; 13766 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4)); 13767 13768 NYI_assert (31, 25, 0x2a); 13769 13770 if (op != 0) 13771 HALT_UNALLOC; 13772 13773 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; 13774 13775 bcc (cpu, offset, INSTR (3, 0)); 13776 } 13777 13778 static void 13779 dexBranchRegister (sim_cpu *cpu) 13780 { 13781 /* instr[31,25] = 110 1011 13782 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS 13783 instr[20,16] = op2 : must be 11111 13784 instr[15,10] = op3 : must be 000000 13785 instr[4,0] = op2 : must be 11111. */ 13786 13787 uint32_t op = INSTR (24, 21); 13788 uint32_t op2 = INSTR (20, 16); 13789 uint32_t op3 = INSTR (15, 10); 13790 uint32_t op4 = INSTR (4, 0); 13791 13792 NYI_assert (31, 25, 0x6b); 13793 13794 if (op2 != 0x1F || op3 != 0 || op4 != 0) 13795 HALT_UNALLOC; 13796 13797 if (op == 0) 13798 br (cpu); 13799 13800 else if (op == 1) 13801 blr (cpu); 13802 13803 else if (op == 2) 13804 ret (cpu); 13805 13806 else 13807 { 13808 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */ 13809 /* anything else is unallocated. */ 13810 uint32_t rn = INSTR (4, 0); 13811 13812 if (rn != 0x1f) 13813 HALT_UNALLOC; 13814 13815 if (op == 4 || op == 5) 13816 HALT_NYI; 13817 13818 HALT_UNALLOC; 13819 } 13820 } 13821 13822 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h 13823 but this may not be available. So instead we define the values we need 13824 here. */ 13825 #define AngelSVC_Reason_Open 0x01 13826 #define AngelSVC_Reason_Close 0x02 13827 #define AngelSVC_Reason_Write 0x05 13828 #define AngelSVC_Reason_Read 0x06 13829 #define AngelSVC_Reason_IsTTY 0x09 13830 #define AngelSVC_Reason_Seek 0x0A 13831 #define AngelSVC_Reason_FLen 0x0C 13832 #define AngelSVC_Reason_Remove 0x0E 13833 #define AngelSVC_Reason_Rename 0x0F 13834 #define AngelSVC_Reason_Clock 0x10 13835 #define AngelSVC_Reason_Time 0x11 13836 #define AngelSVC_Reason_System 0x12 13837 #define AngelSVC_Reason_Errno 0x13 13838 #define AngelSVC_Reason_GetCmdLine 0x15 13839 #define AngelSVC_Reason_HeapInfo 0x16 13840 #define AngelSVC_Reason_ReportException 0x18 13841 #define AngelSVC_Reason_Elapsed 0x30 13842 13843 13844 static void 13845 handle_halt (sim_cpu *cpu, uint32_t val) 13846 { 13847 uint64_t result = 0; 13848 13849 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13850 if (val != 0xf000) 13851 { 13852 TRACE_SYSCALL (cpu, " HLT [0x%x]", val); 13853 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13854 sim_stopped, SIM_SIGTRAP); 13855 } 13856 13857 /* We have encountered an Angel SVC call. See if we can process it. */ 13858 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP)) 13859 { 13860 case AngelSVC_Reason_HeapInfo: 13861 { 13862 /* Get the values. */ 13863 uint64_t stack_top = aarch64_get_stack_start (cpu); 13864 uint64_t heap_base = aarch64_get_heap_start (cpu); 13865 13866 /* Get the pointer */ 13867 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13868 ptr = aarch64_get_mem_u64 (cpu, ptr); 13869 13870 /* Fill in the memory block. */ 13871 /* Start addr of heap. */ 13872 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base); 13873 /* End addr of heap. */ 13874 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top); 13875 /* Lowest stack addr. */ 13876 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base); 13877 /* Initial stack addr. */ 13878 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top); 13879 13880 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info"); 13881 } 13882 break; 13883 13884 case AngelSVC_Reason_Open: 13885 { 13886 /* Get the pointer */ 13887 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */ 13888 /* FIXME: For now we just assume that we will only be asked 13889 to open the standard file descriptors. */ 13890 static int fd = 0; 13891 result = fd ++; 13892 13893 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1); 13894 } 13895 break; 13896 13897 case AngelSVC_Reason_Close: 13898 { 13899 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13900 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh); 13901 result = 0; 13902 } 13903 break; 13904 13905 case AngelSVC_Reason_Errno: 13906 result = 0; 13907 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno"); 13908 break; 13909 13910 case AngelSVC_Reason_Clock: 13911 result = 13912 #ifdef CLOCKS_PER_SEC 13913 (CLOCKS_PER_SEC >= 100) 13914 ? (clock () / (CLOCKS_PER_SEC / 100)) 13915 : ((clock () * 100) / CLOCKS_PER_SEC) 13916 #else 13917 /* Presume unix... clock() returns microseconds. */ 13918 (clock () / 10000) 13919 #endif 13920 ; 13921 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock"); 13922 break; 13923 13924 case AngelSVC_Reason_GetCmdLine: 13925 { 13926 /* Get the pointer */ 13927 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13928 ptr = aarch64_get_mem_u64 (cpu, ptr); 13929 13930 /* FIXME: No command line for now. */ 13931 aarch64_set_mem_u64 (cpu, ptr, 0); 13932 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line"); 13933 } 13934 break; 13935 13936 case AngelSVC_Reason_IsTTY: 13937 result = 1; 13938 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?"); 13939 break; 13940 13941 case AngelSVC_Reason_Write: 13942 { 13943 /* Get the pointer */ 13944 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13945 /* Get the write control block. */ 13946 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr); 13947 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8); 13948 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16); 13949 13950 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %" 13951 PRIx64 " on descriptor %" PRIx64, 13952 len, buf, fd); 13953 13954 if (len > 1280) 13955 { 13956 TRACE_SYSCALL (cpu, 13957 " AngelSVC: Write: Suspiciously long write: %ld", 13958 (long) len); 13959 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13960 sim_stopped, SIM_SIGBUS); 13961 } 13962 else if (fd == 1) 13963 { 13964 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf)); 13965 } 13966 else if (fd == 2) 13967 { 13968 TRACE (cpu, 0, "\n"); 13969 sim_io_eprintf (CPU_STATE (cpu), "%.*s", 13970 (int) len, aarch64_get_mem_ptr (cpu, buf)); 13971 TRACE (cpu, 0, "\n"); 13972 } 13973 else 13974 { 13975 TRACE_SYSCALL (cpu, 13976 " AngelSVC: Write: Unexpected file handle: %d", 13977 (int) fd); 13978 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13979 sim_stopped, SIM_SIGABRT); 13980 } 13981 } 13982 break; 13983 13984 case AngelSVC_Reason_ReportException: 13985 { 13986 /* Get the pointer */ 13987 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13988 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */ 13989 uint64_t type = aarch64_get_mem_u64 (cpu, ptr); 13990 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8); 13991 13992 TRACE_SYSCALL (cpu, 13993 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64, 13994 type, state); 13995 13996 if (type == 0x20026) 13997 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13998 sim_exited, state); 13999 else 14000 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 14001 sim_stopped, SIM_SIGINT); 14002 } 14003 break; 14004 14005 case AngelSVC_Reason_Read: 14006 case AngelSVC_Reason_FLen: 14007 case AngelSVC_Reason_Seek: 14008 case AngelSVC_Reason_Remove: 14009 case AngelSVC_Reason_Time: 14010 case AngelSVC_Reason_System: 14011 case AngelSVC_Reason_Rename: 14012 case AngelSVC_Reason_Elapsed: 14013 default: 14014 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]", 14015 aarch64_get_reg_u32 (cpu, 0, NO_SP)); 14016 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 14017 sim_stopped, SIM_SIGTRAP); 14018 } 14019 14020 aarch64_set_reg_u64 (cpu, 0, NO_SP, result); 14021 } 14022 14023 static void 14024 dexExcpnGen (sim_cpu *cpu) 14025 { 14026 /* instr[31:24] = 11010100 14027 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK 14028 010 ==> HLT, 101 ==> DBG GEN EXCPN 14029 instr[20,5] = imm16 14030 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC 14031 instr[1,0] = LL : discriminates opc */ 14032 14033 uint32_t opc = INSTR (23, 21); 14034 uint32_t imm16 = INSTR (20, 5); 14035 uint32_t opc2 = INSTR (4, 2); 14036 uint32_t LL; 14037 14038 NYI_assert (31, 24, 0xd4); 14039 14040 if (opc2 != 0) 14041 HALT_UNALLOC; 14042 14043 LL = INSTR (1, 0); 14044 14045 /* We only implement HLT and BRK for now. */ 14046 if (opc == 1 && LL == 0) 14047 { 14048 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16); 14049 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 14050 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK)); 14051 } 14052 14053 if (opc == 2 && LL == 0) 14054 handle_halt (cpu, imm16); 14055 14056 else if (opc == 0 || opc == 5) 14057 HALT_NYI; 14058 14059 else 14060 HALT_UNALLOC; 14061 } 14062 14063 /* Stub for accessing system registers. */ 14064 14065 static uint64_t 14066 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, 14067 unsigned crm, unsigned op2) 14068 { 14069 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7) 14070 /* DCZID_EL0 - the Data Cache Zero ID register. 14071 We do not support DC ZVA at the moment, so 14072 we return a value with the disable bit set. 14073 We implement support for the DCZID register since 14074 it is used by the C library's memset function. */ 14075 return ((uint64_t) 1) << 4; 14076 14077 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1) 14078 /* Cache Type Register. */ 14079 return 0x80008000UL; 14080 14081 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2) 14082 /* TPIDR_EL0 - thread pointer id. */ 14083 return aarch64_get_thread_id (cpu); 14084 14085 if (op1 == 3 && crm == 4 && op2 == 0) 14086 return aarch64_get_FPCR (cpu); 14087 14088 if (op1 == 3 && crm == 4 && op2 == 1) 14089 return aarch64_get_FPSR (cpu); 14090 14091 else if (op1 == 3 && crm == 2 && op2 == 0) 14092 return aarch64_get_CPSR (cpu); 14093 14094 HALT_NYI; 14095 } 14096 14097 static void 14098 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, 14099 unsigned crm, unsigned op2, uint64_t val) 14100 { 14101 if (op1 == 3 && crm == 4 && op2 == 0) 14102 aarch64_set_FPCR (cpu, val); 14103 14104 else if (op1 == 3 && crm == 4 && op2 == 1) 14105 aarch64_set_FPSR (cpu, val); 14106 14107 else if (op1 == 3 && crm == 2 && op2 == 0) 14108 aarch64_set_CPSR (cpu, val); 14109 14110 else 14111 HALT_NYI; 14112 } 14113 14114 static void 14115 do_mrs (sim_cpu *cpu) 14116 { 14117 /* instr[31:20] = 1101 0101 0001 1 14118 instr[19] = op0 14119 instr[18,16] = op1 14120 instr[15,12] = CRn 14121 instr[11,8] = CRm 14122 instr[7,5] = op2 14123 instr[4,0] = Rt */ 14124 unsigned sys_op0 = INSTR (19, 19) + 2; 14125 unsigned sys_op1 = INSTR (18, 16); 14126 unsigned sys_crn = INSTR (15, 12); 14127 unsigned sys_crm = INSTR (11, 8); 14128 unsigned sys_op2 = INSTR (7, 5); 14129 unsigned rt = INSTR (4, 0); 14130 14131 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 14132 aarch64_set_reg_u64 (cpu, rt, NO_SP, 14133 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2)); 14134 } 14135 14136 static void 14137 do_MSR_immediate (sim_cpu *cpu) 14138 { 14139 /* instr[31:19] = 1101 0101 0000 0 14140 instr[18,16] = op1 14141 instr[15,12] = 0100 14142 instr[11,8] = CRm 14143 instr[7,5] = op2 14144 instr[4,0] = 1 1111 */ 14145 14146 unsigned op1 = INSTR (18, 16); 14147 /*unsigned crm = INSTR (11, 8);*/ 14148 unsigned op2 = INSTR (7, 5); 14149 14150 NYI_assert (31, 19, 0x1AA0); 14151 NYI_assert (15, 12, 0x4); 14152 NYI_assert (4, 0, 0x1F); 14153 14154 if (op1 == 0) 14155 { 14156 if (op2 == 5) 14157 HALT_NYI; /* set SPSel. */ 14158 else 14159 HALT_UNALLOC; 14160 } 14161 else if (op1 == 3) 14162 { 14163 if (op2 == 6) 14164 HALT_NYI; /* set DAIFset. */ 14165 else if (op2 == 7) 14166 HALT_NYI; /* set DAIFclr. */ 14167 else 14168 HALT_UNALLOC; 14169 } 14170 else 14171 HALT_UNALLOC; 14172 } 14173 14174 static void 14175 do_MSR_reg (sim_cpu *cpu) 14176 { 14177 /* instr[31:20] = 1101 0101 0001 14178 instr[19] = op0 14179 instr[18,16] = op1 14180 instr[15,12] = CRn 14181 instr[11,8] = CRm 14182 instr[7,5] = op2 14183 instr[4,0] = Rt */ 14184 14185 unsigned sys_op0 = INSTR (19, 19) + 2; 14186 unsigned sys_op1 = INSTR (18, 16); 14187 unsigned sys_crn = INSTR (15, 12); 14188 unsigned sys_crm = INSTR (11, 8); 14189 unsigned sys_op2 = INSTR (7, 5); 14190 unsigned rt = INSTR (4, 0); 14191 14192 NYI_assert (31, 20, 0xD51); 14193 14194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 14195 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2, 14196 aarch64_get_reg_u64 (cpu, rt, NO_SP)); 14197 } 14198 14199 static void 14200 do_SYS (sim_cpu *cpu) 14201 { 14202 /* instr[31,19] = 1101 0101 0000 1 14203 instr[18,16] = op1 14204 instr[15,12] = CRn 14205 instr[11,8] = CRm 14206 instr[7,5] = op2 14207 instr[4,0] = Rt */ 14208 NYI_assert (31, 19, 0x1AA1); 14209 14210 /* FIXME: For now we just silently accept system ops. */ 14211 } 14212 14213 static void 14214 dexSystem (sim_cpu *cpu) 14215 { 14216 /* instr[31:22] = 1101 01010 0 14217 instr[21] = L 14218 instr[20,19] = op0 14219 instr[18,16] = op1 14220 instr[15,12] = CRn 14221 instr[11,8] = CRm 14222 instr[7,5] = op2 14223 instr[4,0] = uimm5 */ 14224 14225 /* We are interested in HINT, DSB, DMB and ISB 14226 14227 Hint #0 encodes NOOP (this is the only hint we care about) 14228 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111, 14229 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101 14230 14231 DSB, DMB, ISB are data store barrier, data memory barrier and 14232 instruction store barrier, respectively, where 14233 14234 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111, 14235 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110 14236 CRm<3:2> ==> domain, CRm<1:0> ==> types, 14237 domain : 00 ==> OuterShareable, 01 ==> Nonshareable, 14238 10 ==> InerShareable, 11 ==> FullSystem 14239 types : 01 ==> Reads, 10 ==> Writes, 14240 11 ==> All, 00 ==> All (domain == FullSystem). */ 14241 14242 unsigned rt = INSTR (4, 0); 14243 14244 NYI_assert (31, 22, 0x354); 14245 14246 switch (INSTR (21, 12)) 14247 { 14248 case 0x032: 14249 if (rt == 0x1F) 14250 { 14251 /* NOP has CRm != 0000 OR. */ 14252 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */ 14253 uint32_t crm = INSTR (11, 8); 14254 uint32_t op2 = INSTR (7, 5); 14255 14256 if (crm != 0 || (op2 == 0 || op2 > 5)) 14257 { 14258 /* Actually call nop method so we can reimplement it later. */ 14259 nop (cpu); 14260 return; 14261 } 14262 } 14263 HALT_NYI; 14264 14265 case 0x033: 14266 { 14267 uint32_t op2 = INSTR (7, 5); 14268 14269 switch (op2) 14270 { 14271 case 2: HALT_NYI; 14272 case 4: dsb (cpu); return; 14273 case 5: dmb (cpu); return; 14274 case 6: isb (cpu); return; 14275 default: HALT_UNALLOC; 14276 } 14277 } 14278 14279 case 0x3B0: 14280 case 0x3B4: 14281 case 0x3BD: 14282 do_mrs (cpu); 14283 return; 14284 14285 case 0x0B7: 14286 do_SYS (cpu); /* DC is an alias of SYS. */ 14287 return; 14288 14289 default: 14290 if (INSTR (21, 20) == 0x1) 14291 do_MSR_reg (cpu); 14292 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4) 14293 do_MSR_immediate (cpu); 14294 else 14295 HALT_NYI; 14296 return; 14297 } 14298 } 14299 14300 static void 14301 dexBr (sim_cpu *cpu) 14302 { 14303 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); 14304 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011 14305 bits [31,29] of a BrExSys are the secondary dispatch vector. */ 14306 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu)); 14307 14308 switch (group2) 14309 { 14310 case BR_IMM_000: 14311 return dexBranchImmediate (cpu); 14312 14313 case BR_IMMCMP_001: 14314 /* Compare has bit 25 clear while test has it set. */ 14315 if (!INSTR (25, 25)) 14316 dexCompareBranchImmediate (cpu); 14317 else 14318 dexTestBranchImmediate (cpu); 14319 return; 14320 14321 case BR_IMMCOND_010: 14322 /* This is a conditional branch if bit 25 is clear otherwise 14323 unallocated. */ 14324 if (!INSTR (25, 25)) 14325 dexCondBranchImmediate (cpu); 14326 else 14327 HALT_UNALLOC; 14328 return; 14329 14330 case BR_UNALLOC_011: 14331 HALT_UNALLOC; 14332 14333 case BR_IMM_100: 14334 dexBranchImmediate (cpu); 14335 return; 14336 14337 case BR_IMMCMP_101: 14338 /* Compare has bit 25 clear while test has it set. */ 14339 if (!INSTR (25, 25)) 14340 dexCompareBranchImmediate (cpu); 14341 else 14342 dexTestBranchImmediate (cpu); 14343 return; 14344 14345 case BR_REG_110: 14346 /* Unconditional branch reg has bit 25 set. */ 14347 if (INSTR (25, 25)) 14348 dexBranchRegister (cpu); 14349 14350 /* This includes both Excpn Gen, System and unalloc operations. 14351 We need to decode the Excpn Gen operation BRK so we can plant 14352 debugger entry points. 14353 Excpn Gen operations have instr [24] = 0. 14354 we need to decode at least one of the System operations NOP 14355 which is an alias for HINT #0. 14356 System operations have instr [24,22] = 100. */ 14357 else if (INSTR (24, 24) == 0) 14358 dexExcpnGen (cpu); 14359 14360 else if (INSTR (24, 22) == 4) 14361 dexSystem (cpu); 14362 14363 else 14364 HALT_UNALLOC; 14365 14366 return; 14367 14368 case BR_UNALLOC_111: 14369 HALT_UNALLOC; 14370 14371 default: 14372 /* Should never reach here. */ 14373 HALT_NYI; 14374 } 14375 } 14376 14377 static void 14378 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc) 14379 { 14380 /* We need to check if gdb wants an in here. */ 14381 /* checkBreak (cpu);. */ 14382 14383 uint64_t group = dispatchGroup (aarch64_get_instr (cpu)); 14384 14385 switch (group) 14386 { 14387 case GROUP_PSEUDO_0000: dexPseudo (cpu); break; 14388 case GROUP_LDST_0100: dexLdSt (cpu); break; 14389 case GROUP_DPREG_0101: dexDPReg (cpu); break; 14390 case GROUP_LDST_0110: dexLdSt (cpu); break; 14391 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break; 14392 case GROUP_DPIMM_1000: dexDPImm (cpu); break; 14393 case GROUP_DPIMM_1001: dexDPImm (cpu); break; 14394 case GROUP_BREXSYS_1010: dexBr (cpu); break; 14395 case GROUP_BREXSYS_1011: dexBr (cpu); break; 14396 case GROUP_LDST_1100: dexLdSt (cpu); break; 14397 case GROUP_DPREG_1101: dexDPReg (cpu); break; 14398 case GROUP_LDST_1110: dexLdSt (cpu); break; 14399 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break; 14400 14401 case GROUP_UNALLOC_0001: 14402 case GROUP_UNALLOC_0010: 14403 case GROUP_UNALLOC_0011: 14404 HALT_UNALLOC; 14405 14406 default: 14407 /* Should never reach here. */ 14408 HALT_NYI; 14409 } 14410 } 14411 14412 static bfd_boolean 14413 aarch64_step (sim_cpu *cpu) 14414 { 14415 uint64_t pc = aarch64_get_PC (cpu); 14416 14417 if (pc == TOP_LEVEL_RETURN_PC) 14418 return FALSE; 14419 14420 aarch64_set_next_PC (cpu, pc + 4); 14421 14422 /* Code is always little-endian. */ 14423 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map, 14424 & aarch64_get_instr (cpu), pc, 4); 14425 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu)); 14426 14427 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc, 14428 aarch64_get_instr (cpu)); 14429 TRACE_DISASM (cpu, pc); 14430 14431 aarch64_decode_and_execute (cpu, pc); 14432 14433 return TRUE; 14434 } 14435 14436 void 14437 aarch64_run (SIM_DESC sd) 14438 { 14439 sim_cpu *cpu = STATE_CPU (sd, 0); 14440 14441 while (aarch64_step (cpu)) 14442 { 14443 aarch64_update_PC (cpu); 14444 14445 if (sim_events_tick (sd)) 14446 sim_events_process (sd); 14447 } 14448 14449 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu), 14450 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP)); 14451 } 14452 14453 void 14454 aarch64_init (sim_cpu *cpu, uint64_t pc) 14455 { 14456 uint64_t sp = aarch64_get_stack_start (cpu); 14457 14458 /* Install SP, FP and PC and set LR to -20 14459 so we can detect a top-level return. */ 14460 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp); 14461 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp); 14462 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC); 14463 aarch64_set_next_PC (cpu, pc); 14464 aarch64_update_PC (cpu); 14465 aarch64_init_LIT_table (); 14466 } 14467