1 /* simulator.c -- Interface for the AArch64 simulator. 2 3 Copyright (C) 2015-2023 Free Software Foundation, Inc. 4 5 Contributed by Red Hat. 6 7 This file is part of GDB. 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or 12 (at your option) any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 21 22 /* This must come before any other includes. */ 23 #include "defs.h" 24 25 #include <stdlib.h> 26 #include <stdio.h> 27 #include <string.h> 28 #include <sys/types.h> 29 #include <math.h> 30 #include <time.h> 31 #include <limits.h> 32 33 #include "simulator.h" 34 #include "cpustate.h" 35 #include "memory.h" 36 37 #include "sim-signal.h" 38 39 #define NO_SP 0 40 #define SP_OK 1 41 42 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag)) 43 #define IS_SET(_X) (TST (( _X )) ? 1 : 0) 44 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1) 45 46 /* Space saver macro. */ 47 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW)) 48 49 #define HALT_UNALLOC \ 50 do \ 51 { \ 52 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \ 53 TRACE_INSN (cpu, \ 54 "Unallocated instruction detected at sim line %d," \ 55 " exe addr %" PRIx64, \ 56 __LINE__, aarch64_get_PC (cpu)); \ 57 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ 58 sim_stopped, SIM_SIGILL); \ 59 } \ 60 while (0) 61 62 #define HALT_NYI \ 63 do \ 64 { \ 65 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \ 66 TRACE_INSN (cpu, \ 67 "Unimplemented instruction detected at sim line %d," \ 68 " exe addr %" PRIx64, \ 69 __LINE__, aarch64_get_PC (cpu)); \ 70 if (! TRACE_ANY_P (cpu)) \ 71 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \ 72 aarch64_get_instr (cpu)); \ 73 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ 74 sim_stopped, SIM_SIGABRT); \ 75 } \ 76 while (0) 77 78 #define NYI_assert(HI, LO, EXPECTED) \ 79 do \ 80 { \ 81 if (INSTR ((HI), (LO)) != (EXPECTED)) \ 82 HALT_NYI; \ 83 } \ 84 while (0) 85 86 static uint64_t 87 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N) 88 { 89 uint64_t mask; 90 uint64_t imm; 91 unsigned simd_size; 92 93 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R 94 (in other words, right rotated by R), then replicated. */ 95 if (N != 0) 96 { 97 simd_size = 64; 98 mask = 0xffffffffffffffffull; 99 } 100 else 101 { 102 switch (S) 103 { 104 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; 105 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; 106 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; 107 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; 108 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break; 109 default: return 0; 110 } 111 mask = (1ull << simd_size) - 1; 112 /* Top bits are IGNORED. */ 113 R &= simd_size - 1; 114 } 115 116 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */ 117 if (S == simd_size - 1) 118 return 0; 119 120 /* S+1 consecutive bits to 1. */ 121 /* NOTE: S can't be 63 due to detection above. */ 122 imm = (1ull << (S + 1)) - 1; 123 124 /* Rotate to the left by simd_size - R. */ 125 if (R != 0) 126 imm = ((imm << (simd_size - R)) & mask) | (imm >> R); 127 128 /* Replicate the value according to SIMD size. */ 129 switch (simd_size) 130 { 131 case 2: imm = (imm << 2) | imm; 132 case 4: imm = (imm << 4) | imm; 133 case 8: imm = (imm << 8) | imm; 134 case 16: imm = (imm << 16) | imm; 135 case 32: imm = (imm << 32) | imm; 136 case 64: break; 137 default: return 0; 138 } 139 140 return imm; 141 } 142 143 /* Instr[22,10] encodes N immr and imms. we want a lookup table 144 for each possible combination i.e. 13 bits worth of int entries. */ 145 #define LI_TABLE_SIZE (1 << 13) 146 static uint64_t LITable[LI_TABLE_SIZE]; 147 148 void 149 aarch64_init_LIT_table (void) 150 { 151 unsigned index; 152 153 for (index = 0; index < LI_TABLE_SIZE; index++) 154 { 155 uint32_t N = uimm (index, 12, 12); 156 uint32_t immr = uimm (index, 11, 6); 157 uint32_t imms = uimm (index, 5, 0); 158 159 LITable [index] = expand_logical_immediate (imms, immr, N); 160 } 161 } 162 163 static void 164 dexNotify (sim_cpu *cpu) 165 { 166 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry 167 2 ==> exit Java, 3 ==> start next bytecode. */ 168 uint32_t type = INSTR (14, 0); 169 170 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type); 171 172 switch (type) 173 { 174 case 0: 175 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0), 176 aarch64_get_reg_u64 (cpu, R22, 0)); */ 177 break; 178 case 1: 179 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0), 180 aarch64_get_reg_u64 (cpu, R22, 0)); */ 181 break; 182 case 2: 183 /* aarch64_notifyMethodExit (); */ 184 break; 185 case 3: 186 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0), 187 aarch64_get_reg_u64 (cpu, R22, 0)); */ 188 break; 189 } 190 } 191 192 /* secondary decode within top level groups */ 193 194 static void 195 dexPseudo (sim_cpu *cpu) 196 { 197 /* assert instr[28,27] = 00 198 199 We provide 2 pseudo instructions: 200 201 HALT stops execution of the simulator causing an immediate 202 return to the x86 code which entered it. 203 204 CALLOUT initiates recursive entry into x86 code. A register 205 argument holds the address of the x86 routine. Immediate 206 values in the instruction identify the number of general 207 purpose and floating point register arguments to be passed 208 and the type of any value to be returned. */ 209 210 uint32_t PSEUDO_HALT = 0xE0000000U; 211 uint32_t PSEUDO_CALLOUT = 0x00018000U; 212 uint32_t PSEUDO_CALLOUTR = 0x00018001U; 213 uint32_t PSEUDO_NOTIFY = 0x00014000U; 214 uint32_t dispatch; 215 216 if (aarch64_get_instr (cpu) == PSEUDO_HALT) 217 { 218 TRACE_EVENTS (cpu, " Pseudo Halt Instruction"); 219 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 220 sim_stopped, SIM_SIGTRAP); 221 } 222 223 dispatch = INSTR (31, 15); 224 225 /* We do not handle callouts at the moment. */ 226 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR) 227 { 228 TRACE_EVENTS (cpu, " Callout"); 229 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 230 sim_stopped, SIM_SIGABRT); 231 } 232 233 else if (dispatch == PSEUDO_NOTIFY) 234 dexNotify (cpu); 235 236 else 237 HALT_UNALLOC; 238 } 239 240 /* Load-store single register (unscaled offset) 241 These instructions employ a base register plus an unscaled signed 242 9 bit offset. 243 244 N.B. the base register (source) can be Xn or SP. all other 245 registers may not be SP. */ 246 247 /* 32 bit load 32 bit unscaled signed 9 bit. */ 248 static void 249 ldur32 (sim_cpu *cpu, int32_t offset) 250 { 251 unsigned rn = INSTR (9, 5); 252 unsigned rt = INSTR (4, 0); 253 254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 255 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 256 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 257 + offset)); 258 } 259 260 /* 64 bit load 64 bit unscaled signed 9 bit. */ 261 static void 262 ldur64 (sim_cpu *cpu, int32_t offset) 263 { 264 unsigned rn = INSTR (9, 5); 265 unsigned rt = INSTR (4, 0); 266 267 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 268 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 269 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 270 + offset)); 271 } 272 273 /* 32 bit load zero-extended byte unscaled signed 9 bit. */ 274 static void 275 ldurb32 (sim_cpu *cpu, int32_t offset) 276 { 277 unsigned rn = INSTR (9, 5); 278 unsigned rt = INSTR (4, 0); 279 280 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 281 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 282 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 283 + offset)); 284 } 285 286 /* 32 bit load sign-extended byte unscaled signed 9 bit. */ 287 static void 288 ldursb32 (sim_cpu *cpu, int32_t offset) 289 { 290 unsigned rn = INSTR (9, 5); 291 unsigned rt = INSTR (4, 0); 292 293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 294 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8 295 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 296 + offset)); 297 } 298 299 /* 64 bit load sign-extended byte unscaled signed 9 bit. */ 300 static void 301 ldursb64 (sim_cpu *cpu, int32_t offset) 302 { 303 unsigned rn = INSTR (9, 5); 304 unsigned rt = INSTR (4, 0); 305 306 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 307 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8 308 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 309 + offset)); 310 } 311 312 /* 32 bit load zero-extended short unscaled signed 9 bit */ 313 static void 314 ldurh32 (sim_cpu *cpu, int32_t offset) 315 { 316 unsigned rn = INSTR (9, 5); 317 unsigned rd = INSTR (4, 0); 318 319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 320 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16 321 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 322 + offset)); 323 } 324 325 /* 32 bit load sign-extended short unscaled signed 9 bit */ 326 static void 327 ldursh32 (sim_cpu *cpu, int32_t offset) 328 { 329 unsigned rn = INSTR (9, 5); 330 unsigned rd = INSTR (4, 0); 331 332 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 333 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16 334 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 335 + offset)); 336 } 337 338 /* 64 bit load sign-extended short unscaled signed 9 bit */ 339 static void 340 ldursh64 (sim_cpu *cpu, int32_t offset) 341 { 342 unsigned rn = INSTR (9, 5); 343 unsigned rt = INSTR (4, 0); 344 345 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 346 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16 347 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 348 + offset)); 349 } 350 351 /* 64 bit load sign-extended word unscaled signed 9 bit */ 352 static void 353 ldursw (sim_cpu *cpu, int32_t offset) 354 { 355 unsigned rn = INSTR (9, 5); 356 unsigned rd = INSTR (4, 0); 357 358 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 359 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32 360 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 361 + offset)); 362 } 363 364 /* N.B. with stores the value in source is written to the address 365 identified by source2 modified by offset. */ 366 367 /* 32 bit store 32 bit unscaled signed 9 bit. */ 368 static void 369 stur32 (sim_cpu *cpu, int32_t offset) 370 { 371 unsigned rn = INSTR (9, 5); 372 unsigned rd = INSTR (4, 0); 373 374 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 375 aarch64_set_mem_u32 (cpu, 376 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 377 aarch64_get_reg_u32 (cpu, rd, NO_SP)); 378 } 379 380 /* 64 bit store 64 bit unscaled signed 9 bit */ 381 static void 382 stur64 (sim_cpu *cpu, int32_t offset) 383 { 384 unsigned rn = INSTR (9, 5); 385 unsigned rd = INSTR (4, 0); 386 387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 388 aarch64_set_mem_u64 (cpu, 389 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 390 aarch64_get_reg_u64 (cpu, rd, NO_SP)); 391 } 392 393 /* 32 bit store byte unscaled signed 9 bit */ 394 static void 395 sturb (sim_cpu *cpu, int32_t offset) 396 { 397 unsigned rn = INSTR (9, 5); 398 unsigned rd = INSTR (4, 0); 399 400 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 401 aarch64_set_mem_u8 (cpu, 402 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 403 aarch64_get_reg_u8 (cpu, rd, NO_SP)); 404 } 405 406 /* 32 bit store short unscaled signed 9 bit */ 407 static void 408 sturh (sim_cpu *cpu, int32_t offset) 409 { 410 unsigned rn = INSTR (9, 5); 411 unsigned rd = INSTR (4, 0); 412 413 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 414 aarch64_set_mem_u16 (cpu, 415 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 416 aarch64_get_reg_u16 (cpu, rd, NO_SP)); 417 } 418 419 /* Load single register pc-relative label 420 Offset is a signed 19 bit immediate count in words 421 rt may not be SP. */ 422 423 /* 32 bit pc-relative load */ 424 static void 425 ldr32_pcrel (sim_cpu *cpu, int32_t offset) 426 { 427 unsigned rd = INSTR (4, 0); 428 429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 430 aarch64_set_reg_u64 (cpu, rd, NO_SP, 431 aarch64_get_mem_u32 432 (cpu, aarch64_get_PC (cpu) + offset * 4)); 433 } 434 435 /* 64 bit pc-relative load */ 436 static void 437 ldr_pcrel (sim_cpu *cpu, int32_t offset) 438 { 439 unsigned rd = INSTR (4, 0); 440 441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 442 aarch64_set_reg_u64 (cpu, rd, NO_SP, 443 aarch64_get_mem_u64 444 (cpu, aarch64_get_PC (cpu) + offset * 4)); 445 } 446 447 /* sign extended 32 bit pc-relative load */ 448 static void 449 ldrsw_pcrel (sim_cpu *cpu, int32_t offset) 450 { 451 unsigned rd = INSTR (4, 0); 452 453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 454 aarch64_set_reg_u64 (cpu, rd, NO_SP, 455 aarch64_get_mem_s32 456 (cpu, aarch64_get_PC (cpu) + offset * 4)); 457 } 458 459 /* float pc-relative load */ 460 static void 461 fldrs_pcrel (sim_cpu *cpu, int32_t offset) 462 { 463 unsigned int rd = INSTR (4, 0); 464 465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 466 aarch64_set_vec_u32 (cpu, rd, 0, 467 aarch64_get_mem_u32 468 (cpu, aarch64_get_PC (cpu) + offset * 4)); 469 } 470 471 /* double pc-relative load */ 472 static void 473 fldrd_pcrel (sim_cpu *cpu, int32_t offset) 474 { 475 unsigned int st = INSTR (4, 0); 476 477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 478 aarch64_set_vec_u64 (cpu, st, 0, 479 aarch64_get_mem_u64 480 (cpu, aarch64_get_PC (cpu) + offset * 4)); 481 } 482 483 /* long double pc-relative load. */ 484 static void 485 fldrq_pcrel (sim_cpu *cpu, int32_t offset) 486 { 487 unsigned int st = INSTR (4, 0); 488 uint64_t addr = aarch64_get_PC (cpu) + offset * 4; 489 FRegister a; 490 491 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 492 aarch64_get_mem_long_double (cpu, addr, & a); 493 aarch64_set_FP_long_double (cpu, st, a); 494 } 495 496 /* This can be used to scale an offset by applying 497 the requisite shift. the second argument is either 498 16, 32 or 64. */ 499 500 #define SCALE(_offset, _elementSize) \ 501 ((_offset) << ScaleShift ## _elementSize) 502 503 /* This can be used to optionally scale a register derived offset 504 by applying the requisite shift as indicated by the Scaling 505 argument. The second argument is either Byte, Short, Word 506 or Long. The third argument is either Scaled or Unscaled. 507 N.B. when _Scaling is Scaled the shift gets ANDed with 508 all 1s while when it is Unscaled it gets ANDed with 0. */ 509 510 #define OPT_SCALE(_offset, _elementType, _Scaling) \ 511 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0)) 512 513 /* This can be used to zero or sign extend a 32 bit register derived 514 value to a 64 bit value. the first argument must be the value as 515 a uint32_t and the second must be either UXTW or SXTW. The result 516 is returned as an int64_t. */ 517 518 static inline int64_t 519 extend (uint32_t value, Extension extension) 520 { 521 union 522 { 523 uint32_t u; 524 int32_t n; 525 } x; 526 527 /* A branchless variant of this ought to be possible. */ 528 if (extension == UXTW || extension == NoExtension) 529 return value; 530 531 x.u = value; 532 return x.n; 533 } 534 535 /* Scalar Floating Point 536 537 FP load/store single register (4 addressing modes) 538 539 N.B. the base register (source) can be the stack pointer. 540 The secondary source register (source2) can only be an Xn register. */ 541 542 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */ 543 static void 544 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 545 { 546 unsigned rn = INSTR (9, 5); 547 unsigned st = INSTR (4, 0); 548 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 549 550 if (wb != Post) 551 address += offset; 552 553 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 554 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address)); 555 if (wb == Post) 556 address += offset; 557 558 if (wb != NoWriteBack) 559 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 560 } 561 562 /* Load 8 bit with unsigned 12 bit offset. */ 563 static void 564 fldrb_abs (sim_cpu *cpu, uint32_t offset) 565 { 566 unsigned rd = INSTR (4, 0); 567 unsigned rn = INSTR (9, 5); 568 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; 569 570 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 571 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr)); 572 } 573 574 /* Load 16 bit scaled unsigned 12 bit. */ 575 static void 576 fldrh_abs (sim_cpu *cpu, uint32_t offset) 577 { 578 unsigned rd = INSTR (4, 0); 579 unsigned rn = INSTR (9, 5); 580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16); 581 582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 583 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr)); 584 } 585 586 /* Load 32 bit scaled unsigned 12 bit. */ 587 static void 588 fldrs_abs (sim_cpu *cpu, uint32_t offset) 589 { 590 unsigned rd = INSTR (4, 0); 591 unsigned rn = INSTR (9, 5); 592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32); 593 594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 595 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr)); 596 } 597 598 /* Load 64 bit scaled unsigned 12 bit. */ 599 static void 600 fldrd_abs (sim_cpu *cpu, uint32_t offset) 601 { 602 unsigned rd = INSTR (4, 0); 603 unsigned rn = INSTR (9, 5); 604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64); 605 606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 607 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr)); 608 } 609 610 /* Load 128 bit scaled unsigned 12 bit. */ 611 static void 612 fldrq_abs (sim_cpu *cpu, uint32_t offset) 613 { 614 unsigned rd = INSTR (4, 0); 615 unsigned rn = INSTR (9, 5); 616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); 617 618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr)); 620 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8)); 621 } 622 623 /* Load 32 bit scaled or unscaled zero- or sign-extended 624 32-bit register offset. */ 625 static void 626 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 627 { 628 unsigned rm = INSTR (20, 16); 629 unsigned rn = INSTR (9, 5); 630 unsigned st = INSTR (4, 0); 631 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 632 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 633 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 634 635 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 636 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 637 (cpu, address + displacement)); 638 } 639 640 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ 641 static void 642 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 643 { 644 unsigned rn = INSTR (9, 5); 645 unsigned st = INSTR (4, 0); 646 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 647 648 if (wb != Post) 649 address += offset; 650 651 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 652 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address)); 653 654 if (wb == Post) 655 address += offset; 656 657 if (wb != NoWriteBack) 658 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 659 } 660 661 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */ 662 static void 663 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 664 { 665 unsigned rm = INSTR (20, 16); 666 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 667 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 668 669 fldrd_wb (cpu, displacement, NoWriteBack); 670 } 671 672 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */ 673 static void 674 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 675 { 676 FRegister a; 677 unsigned rn = INSTR (9, 5); 678 unsigned st = INSTR (4, 0); 679 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 680 681 if (wb != Post) 682 address += offset; 683 684 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 685 aarch64_get_mem_long_double (cpu, address, & a); 686 aarch64_set_FP_long_double (cpu, st, a); 687 688 if (wb == Post) 689 address += offset; 690 691 if (wb != NoWriteBack) 692 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 693 } 694 695 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */ 696 static void 697 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 698 { 699 unsigned rm = INSTR (20, 16); 700 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 701 uint64_t displacement = OPT_SCALE (extended, 128, scaling); 702 703 fldrq_wb (cpu, displacement, NoWriteBack); 704 } 705 706 /* Memory Access 707 708 load-store single register 709 There are four addressing modes available here which all employ a 710 64 bit source (base) register. 711 712 N.B. the base register (source) can be the stack pointer. 713 The secondary source register (source2)can only be an Xn register. 714 715 Scaled, 12-bit, unsigned immediate offset, without pre- and 716 post-index options. 717 Unscaled, 9-bit, signed immediate offset with pre- or post-index 718 writeback. 719 scaled or unscaled 64-bit register offset. 720 scaled or unscaled 32-bit extended register offset. 721 722 All offsets are assumed to be raw from the decode i.e. the 723 simulator is expected to adjust scaled offsets based on the 724 accessed data size with register or extended register offset 725 versions the same applies except that in the latter case the 726 operation may also require a sign extend. 727 728 A separate method is provided for each possible addressing mode. */ 729 730 /* 32 bit load 32 bit scaled unsigned 12 bit */ 731 static void 732 ldr32_abs (sim_cpu *cpu, uint32_t offset) 733 { 734 unsigned rn = INSTR (9, 5); 735 unsigned rt = INSTR (4, 0); 736 737 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 738 /* The target register may not be SP but the source may be. */ 739 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 740 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 741 + SCALE (offset, 32))); 742 } 743 744 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */ 745 static void 746 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 747 { 748 unsigned rn = INSTR (9, 5); 749 unsigned rt = INSTR (4, 0); 750 uint64_t address; 751 752 if (rn == rt && wb != NoWriteBack) 753 HALT_UNALLOC; 754 755 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 756 757 if (wb != Post) 758 address += offset; 759 760 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 761 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); 762 763 if (wb == Post) 764 address += offset; 765 766 if (wb != NoWriteBack) 767 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 768 } 769 770 /* 32 bit load 32 bit scaled or unscaled 771 zero- or sign-extended 32-bit register offset */ 772 static void 773 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 774 { 775 unsigned rm = INSTR (20, 16); 776 unsigned rn = INSTR (9, 5); 777 unsigned rt = INSTR (4, 0); 778 /* rn may reference SP, rm and rt must reference ZR */ 779 780 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 781 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 782 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 783 784 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 785 aarch64_set_reg_u64 (cpu, rt, NO_SP, 786 aarch64_get_mem_u32 (cpu, address + displacement)); 787 } 788 789 /* 64 bit load 64 bit scaled unsigned 12 bit */ 790 static void 791 ldr_abs (sim_cpu *cpu, uint32_t offset) 792 { 793 unsigned rn = INSTR (9, 5); 794 unsigned rt = INSTR (4, 0); 795 796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 797 /* The target register may not be SP but the source may be. */ 798 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 799 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 800 + SCALE (offset, 64))); 801 } 802 803 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ 804 static void 805 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 806 { 807 unsigned rn = INSTR (9, 5); 808 unsigned rt = INSTR (4, 0); 809 uint64_t address; 810 811 if (rn == rt && wb != NoWriteBack) 812 HALT_UNALLOC; 813 814 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 815 816 if (wb != Post) 817 address += offset; 818 819 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 820 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); 821 822 if (wb == Post) 823 address += offset; 824 825 if (wb != NoWriteBack) 826 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 827 } 828 829 /* 64 bit load 64 bit scaled or unscaled zero- 830 or sign-extended 32-bit register offset. */ 831 static void 832 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 833 { 834 unsigned rm = INSTR (20, 16); 835 unsigned rn = INSTR (9, 5); 836 unsigned rt = INSTR (4, 0); 837 /* rn may reference SP, rm and rt must reference ZR */ 838 839 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 840 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 841 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 842 843 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 844 aarch64_set_reg_u64 (cpu, rt, NO_SP, 845 aarch64_get_mem_u64 (cpu, address + displacement)); 846 } 847 848 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */ 849 static void 850 ldrb32_abs (sim_cpu *cpu, uint32_t offset) 851 { 852 unsigned rn = INSTR (9, 5); 853 unsigned rt = INSTR (4, 0); 854 855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 856 /* The target register may not be SP but the source may be 857 there is no scaling required for a byte load. */ 858 aarch64_set_reg_u64 (cpu, rt, NO_SP, 859 aarch64_get_mem_u8 860 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); 861 } 862 863 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */ 864 static void 865 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 866 { 867 unsigned rn = INSTR (9, 5); 868 unsigned rt = INSTR (4, 0); 869 uint64_t address; 870 871 if (rn == rt && wb != NoWriteBack) 872 HALT_UNALLOC; 873 874 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 875 876 if (wb != Post) 877 address += offset; 878 879 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 880 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); 881 882 if (wb == Post) 883 address += offset; 884 885 if (wb != NoWriteBack) 886 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 887 } 888 889 /* 32 bit load zero-extended byte scaled or unscaled zero- 890 or sign-extended 32-bit register offset. */ 891 static void 892 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 893 { 894 unsigned rm = INSTR (20, 16); 895 unsigned rn = INSTR (9, 5); 896 unsigned rt = INSTR (4, 0); 897 /* rn may reference SP, rm and rt must reference ZR */ 898 899 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 900 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 901 extension); 902 903 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 904 /* There is no scaling required for a byte load. */ 905 aarch64_set_reg_u64 (cpu, rt, NO_SP, 906 aarch64_get_mem_u8 (cpu, address + displacement)); 907 } 908 909 /* 64 bit load sign-extended byte unscaled signed 9 bit 910 with pre- or post-writeback. */ 911 static void 912 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 913 { 914 unsigned rn = INSTR (9, 5); 915 unsigned rt = INSTR (4, 0); 916 uint64_t address; 917 int64_t val; 918 919 if (rn == rt && wb != NoWriteBack) 920 HALT_UNALLOC; 921 922 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 923 924 if (wb != Post) 925 address += offset; 926 927 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 928 val = aarch64_get_mem_s8 (cpu, address); 929 aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 930 931 if (wb == Post) 932 address += offset; 933 934 if (wb != NoWriteBack) 935 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 936 } 937 938 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */ 939 static void 940 ldrsb_abs (sim_cpu *cpu, uint32_t offset) 941 { 942 ldrsb_wb (cpu, offset, NoWriteBack); 943 } 944 945 /* 64 bit load sign-extended byte scaled or unscaled zero- 946 or sign-extended 32-bit register offset. */ 947 static void 948 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 949 { 950 unsigned rm = INSTR (20, 16); 951 unsigned rn = INSTR (9, 5); 952 unsigned rt = INSTR (4, 0); 953 /* rn may reference SP, rm and rt must reference ZR */ 954 955 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 956 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 957 extension); 958 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 959 /* There is no scaling required for a byte load. */ 960 aarch64_set_reg_s64 (cpu, rt, NO_SP, 961 aarch64_get_mem_s8 (cpu, address + displacement)); 962 } 963 964 /* 32 bit load zero-extended short scaled unsigned 12 bit. */ 965 static void 966 ldrh32_abs (sim_cpu *cpu, uint32_t offset) 967 { 968 unsigned rn = INSTR (9, 5); 969 unsigned rt = INSTR (4, 0); 970 uint32_t val; 971 972 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 973 /* The target register may not be SP but the source may be. */ 974 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 975 + SCALE (offset, 16)); 976 aarch64_set_reg_u32 (cpu, rt, NO_SP, val); 977 } 978 979 /* 32 bit load zero-extended short unscaled signed 9 bit 980 with pre- or post-writeback. */ 981 static void 982 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 983 { 984 unsigned rn = INSTR (9, 5); 985 unsigned rt = INSTR (4, 0); 986 uint64_t address; 987 988 if (rn == rt && wb != NoWriteBack) 989 HALT_UNALLOC; 990 991 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 992 993 if (wb != Post) 994 address += offset; 995 996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 997 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); 998 999 if (wb == Post) 1000 address += offset; 1001 1002 if (wb != NoWriteBack) 1003 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1004 } 1005 1006 /* 32 bit load zero-extended short scaled or unscaled zero- 1007 or sign-extended 32-bit register offset. */ 1008 static void 1009 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1010 { 1011 unsigned rm = INSTR (20, 16); 1012 unsigned rn = INSTR (9, 5); 1013 unsigned rt = INSTR (4, 0); 1014 /* rn may reference SP, rm and rt must reference ZR */ 1015 1016 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1017 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1018 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 1019 1020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1021 aarch64_set_reg_u32 (cpu, rt, NO_SP, 1022 aarch64_get_mem_u16 (cpu, address + displacement)); 1023 } 1024 1025 /* 32 bit load sign-extended short scaled unsigned 12 bit. */ 1026 static void 1027 ldrsh32_abs (sim_cpu *cpu, uint32_t offset) 1028 { 1029 unsigned rn = INSTR (9, 5); 1030 unsigned rt = INSTR (4, 0); 1031 int32_t val; 1032 1033 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1034 /* The target register may not be SP but the source may be. */ 1035 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 1036 + SCALE (offset, 16)); 1037 aarch64_set_reg_s32 (cpu, rt, NO_SP, val); 1038 } 1039 1040 /* 32 bit load sign-extended short unscaled signed 9 bit 1041 with pre- or post-writeback. */ 1042 static void 1043 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1044 { 1045 unsigned rn = INSTR (9, 5); 1046 unsigned rt = INSTR (4, 0); 1047 uint64_t address; 1048 1049 if (rn == rt && wb != NoWriteBack) 1050 HALT_UNALLOC; 1051 1052 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1053 1054 if (wb != Post) 1055 address += offset; 1056 1057 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1058 aarch64_set_reg_s32 (cpu, rt, NO_SP, 1059 (int32_t) aarch64_get_mem_s16 (cpu, address)); 1060 1061 if (wb == Post) 1062 address += offset; 1063 1064 if (wb != NoWriteBack) 1065 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1066 } 1067 1068 /* 32 bit load sign-extended short scaled or unscaled zero- 1069 or sign-extended 32-bit register offset. */ 1070 static void 1071 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1072 { 1073 unsigned rm = INSTR (20, 16); 1074 unsigned rn = INSTR (9, 5); 1075 unsigned rt = INSTR (4, 0); 1076 /* rn may reference SP, rm and rt must reference ZR */ 1077 1078 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1079 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1080 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 1081 1082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1083 aarch64_set_reg_s32 (cpu, rt, NO_SP, 1084 (int32_t) aarch64_get_mem_s16 1085 (cpu, address + displacement)); 1086 } 1087 1088 /* 64 bit load sign-extended short scaled unsigned 12 bit. */ 1089 static void 1090 ldrsh_abs (sim_cpu *cpu, uint32_t offset) 1091 { 1092 unsigned rn = INSTR (9, 5); 1093 unsigned rt = INSTR (4, 0); 1094 int64_t val; 1095 1096 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1097 /* The target register may not be SP but the source may be. */ 1098 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 1099 + SCALE (offset, 16)); 1100 aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 1101 } 1102 1103 /* 64 bit load sign-extended short unscaled signed 9 bit 1104 with pre- or post-writeback. */ 1105 static void 1106 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1107 { 1108 unsigned rn = INSTR (9, 5); 1109 unsigned rt = INSTR (4, 0); 1110 uint64_t address; 1111 int64_t val; 1112 1113 if (rn == rt && wb != NoWriteBack) 1114 HALT_UNALLOC; 1115 1116 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1117 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1118 1119 if (wb != Post) 1120 address += offset; 1121 1122 val = aarch64_get_mem_s16 (cpu, address); 1123 aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 1124 1125 if (wb == Post) 1126 address += offset; 1127 1128 if (wb != NoWriteBack) 1129 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1130 } 1131 1132 /* 64 bit load sign-extended short scaled or unscaled zero- 1133 or sign-extended 32-bit register offset. */ 1134 static void 1135 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1136 { 1137 unsigned rm = INSTR (20, 16); 1138 unsigned rn = INSTR (9, 5); 1139 unsigned rt = INSTR (4, 0); 1140 1141 /* rn may reference SP, rm and rt must reference ZR */ 1142 1143 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1144 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1145 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 1146 int64_t val; 1147 1148 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1149 val = aarch64_get_mem_s16 (cpu, address + displacement); 1150 aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 1151 } 1152 1153 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */ 1154 static void 1155 ldrsw_abs (sim_cpu *cpu, uint32_t offset) 1156 { 1157 unsigned rn = INSTR (9, 5); 1158 unsigned rt = INSTR (4, 0); 1159 int64_t val; 1160 1161 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1162 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 1163 + SCALE (offset, 32)); 1164 /* The target register may not be SP but the source may be. */ 1165 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val); 1166 } 1167 1168 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit 1169 with pre- or post-writeback. */ 1170 static void 1171 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1172 { 1173 unsigned rn = INSTR (9, 5); 1174 unsigned rt = INSTR (4, 0); 1175 uint64_t address; 1176 1177 if (rn == rt && wb != NoWriteBack) 1178 HALT_UNALLOC; 1179 1180 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1181 1182 if (wb != Post) 1183 address += offset; 1184 1185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1186 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address)); 1187 1188 if (wb == Post) 1189 address += offset; 1190 1191 if (wb != NoWriteBack) 1192 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1193 } 1194 1195 /* 64 bit load sign-extended 32 bit scaled or unscaled zero- 1196 or sign-extended 32-bit register offset. */ 1197 static void 1198 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1199 { 1200 unsigned rm = INSTR (20, 16); 1201 unsigned rn = INSTR (9, 5); 1202 unsigned rt = INSTR (4, 0); 1203 /* rn may reference SP, rm and rt must reference ZR */ 1204 1205 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1206 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1207 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 1208 1209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1210 aarch64_set_reg_s64 (cpu, rt, NO_SP, 1211 aarch64_get_mem_s32 (cpu, address + displacement)); 1212 } 1213 1214 /* N.B. with stores the value in source is written to the 1215 address identified by source2 modified by source3/offset. */ 1216 1217 /* 32 bit store scaled unsigned 12 bit. */ 1218 static void 1219 str32_abs (sim_cpu *cpu, uint32_t offset) 1220 { 1221 unsigned rn = INSTR (9, 5); 1222 unsigned rt = INSTR (4, 0); 1223 1224 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1225 /* The target register may not be SP but the source may be. */ 1226 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK) 1227 + SCALE (offset, 32)), 1228 aarch64_get_reg_u32 (cpu, rt, NO_SP)); 1229 } 1230 1231 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ 1232 static void 1233 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1234 { 1235 unsigned rn = INSTR (9, 5); 1236 unsigned rt = INSTR (4, 0); 1237 uint64_t address; 1238 1239 if (rn == rt && wb != NoWriteBack) 1240 HALT_UNALLOC; 1241 1242 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1243 if (wb != Post) 1244 address += offset; 1245 1246 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1247 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP)); 1248 1249 if (wb == Post) 1250 address += offset; 1251 1252 if (wb != NoWriteBack) 1253 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1254 } 1255 1256 /* 32 bit store scaled or unscaled zero- or 1257 sign-extended 32-bit register offset. */ 1258 static void 1259 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1260 { 1261 unsigned rm = INSTR (20, 16); 1262 unsigned rn = INSTR (9, 5); 1263 unsigned rt = INSTR (4, 0); 1264 1265 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1266 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1267 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 1268 1269 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1270 aarch64_set_mem_u32 (cpu, address + displacement, 1271 aarch64_get_reg_u64 (cpu, rt, NO_SP)); 1272 } 1273 1274 /* 64 bit store scaled unsigned 12 bit. */ 1275 static void 1276 str_abs (sim_cpu *cpu, uint32_t offset) 1277 { 1278 unsigned rn = INSTR (9, 5); 1279 unsigned rt = INSTR (4, 0); 1280 1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1282 aarch64_set_mem_u64 (cpu, 1283 aarch64_get_reg_u64 (cpu, rn, SP_OK) 1284 + SCALE (offset, 64), 1285 aarch64_get_reg_u64 (cpu, rt, NO_SP)); 1286 } 1287 1288 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ 1289 static void 1290 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1291 { 1292 unsigned rn = INSTR (9, 5); 1293 unsigned rt = INSTR (4, 0); 1294 uint64_t address; 1295 1296 if (rn == rt && wb != NoWriteBack) 1297 HALT_UNALLOC; 1298 1299 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1300 1301 if (wb != Post) 1302 address += offset; 1303 1304 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1305 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP)); 1306 1307 if (wb == Post) 1308 address += offset; 1309 1310 if (wb != NoWriteBack) 1311 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1312 } 1313 1314 /* 64 bit store scaled or unscaled zero- 1315 or sign-extended 32-bit register offset. */ 1316 static void 1317 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1318 { 1319 unsigned rm = INSTR (20, 16); 1320 unsigned rn = INSTR (9, 5); 1321 unsigned rt = INSTR (4, 0); 1322 /* rn may reference SP, rm and rt must reference ZR */ 1323 1324 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1325 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1326 extension); 1327 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 1328 1329 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1330 aarch64_set_mem_u64 (cpu, address + displacement, 1331 aarch64_get_reg_u64 (cpu, rt, NO_SP)); 1332 } 1333 1334 /* 32 bit store byte scaled unsigned 12 bit. */ 1335 static void 1336 strb_abs (sim_cpu *cpu, uint32_t offset) 1337 { 1338 unsigned rn = INSTR (9, 5); 1339 unsigned rt = INSTR (4, 0); 1340 1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1342 /* The target register may not be SP but the source may be. 1343 There is no scaling required for a byte load. */ 1344 aarch64_set_mem_u8 (cpu, 1345 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 1346 aarch64_get_reg_u8 (cpu, rt, NO_SP)); 1347 } 1348 1349 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */ 1350 static void 1351 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1352 { 1353 unsigned rn = INSTR (9, 5); 1354 unsigned rt = INSTR (4, 0); 1355 uint64_t address; 1356 1357 if (rn == rt && wb != NoWriteBack) 1358 HALT_UNALLOC; 1359 1360 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1361 1362 if (wb != Post) 1363 address += offset; 1364 1365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1366 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP)); 1367 1368 if (wb == Post) 1369 address += offset; 1370 1371 if (wb != NoWriteBack) 1372 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1373 } 1374 1375 /* 32 bit store byte scaled or unscaled zero- 1376 or sign-extended 32-bit register offset. */ 1377 static void 1378 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1379 { 1380 unsigned rm = INSTR (20, 16); 1381 unsigned rn = INSTR (9, 5); 1382 unsigned rt = INSTR (4, 0); 1383 /* rn may reference SP, rm and rt must reference ZR */ 1384 1385 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1386 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1387 extension); 1388 1389 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1390 /* There is no scaling required for a byte load. */ 1391 aarch64_set_mem_u8 (cpu, address + displacement, 1392 aarch64_get_reg_u8 (cpu, rt, NO_SP)); 1393 } 1394 1395 /* 32 bit store short scaled unsigned 12 bit. */ 1396 static void 1397 strh_abs (sim_cpu *cpu, uint32_t offset) 1398 { 1399 unsigned rn = INSTR (9, 5); 1400 unsigned rt = INSTR (4, 0); 1401 1402 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1403 /* The target register may not be SP but the source may be. */ 1404 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) 1405 + SCALE (offset, 16), 1406 aarch64_get_reg_u16 (cpu, rt, NO_SP)); 1407 } 1408 1409 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */ 1410 static void 1411 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 1412 { 1413 unsigned rn = INSTR (9, 5); 1414 unsigned rt = INSTR (4, 0); 1415 uint64_t address; 1416 1417 if (rn == rt && wb != NoWriteBack) 1418 HALT_UNALLOC; 1419 1420 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1421 1422 if (wb != Post) 1423 address += offset; 1424 1425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1426 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP)); 1427 1428 if (wb == Post) 1429 address += offset; 1430 1431 if (wb != NoWriteBack) 1432 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 1433 } 1434 1435 /* 32 bit store short scaled or unscaled zero- 1436 or sign-extended 32-bit register offset. */ 1437 static void 1438 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1439 { 1440 unsigned rm = INSTR (20, 16); 1441 unsigned rn = INSTR (9, 5); 1442 unsigned rt = INSTR (4, 0); 1443 /* rn may reference SP, rm and rt must reference ZR */ 1444 1445 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1446 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); 1447 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 1448 1449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1450 aarch64_set_mem_u16 (cpu, address + displacement, 1451 aarch64_get_reg_u16 (cpu, rt, NO_SP)); 1452 } 1453 1454 /* Prefetch unsigned 12 bit. */ 1455 static void 1456 prfm_abs (sim_cpu *cpu, uint32_t offset) 1457 { 1458 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, 1459 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, 1460 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, 1461 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, 1462 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, 1463 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, 1464 ow ==> UNALLOC 1465 PrfOp prfop = prfop (instr, 4, 0); 1466 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) 1467 + SCALE (offset, 64). */ 1468 1469 /* TODO : implement prefetch of address. */ 1470 } 1471 1472 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */ 1473 static void 1474 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 1475 { 1476 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, 1477 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, 1478 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, 1479 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, 1480 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, 1481 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, 1482 ow ==> UNALLOC 1483 rn may reference SP, rm may only reference ZR 1484 PrfOp prfop = prfop (instr, 4, 0); 1485 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1486 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1487 extension); 1488 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 1489 uint64_t address = base + displacement. */ 1490 1491 /* TODO : implement prefetch of address */ 1492 } 1493 1494 /* 64 bit pc-relative prefetch. */ 1495 static void 1496 prfm_pcrel (sim_cpu *cpu, int32_t offset) 1497 { 1498 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, 1499 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, 1500 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, 1501 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, 1502 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, 1503 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, 1504 ow ==> UNALLOC 1505 PrfOp prfop = prfop (instr, 4, 0); 1506 uint64_t address = aarch64_get_PC (cpu) + offset. */ 1507 1508 /* TODO : implement this */ 1509 } 1510 1511 /* Load-store exclusive. */ 1512 1513 static void 1514 ldxr (sim_cpu *cpu) 1515 { 1516 unsigned rn = INSTR (9, 5); 1517 unsigned rt = INSTR (4, 0); 1518 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1519 int size = INSTR (31, 30); 1520 /* int ordered = INSTR (15, 15); */ 1521 /* int exclusive = ! INSTR (23, 23); */ 1522 1523 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1524 switch (size) 1525 { 1526 case 0: 1527 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); 1528 break; 1529 case 1: 1530 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); 1531 break; 1532 case 2: 1533 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); 1534 break; 1535 case 3: 1536 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); 1537 break; 1538 } 1539 } 1540 1541 static void 1542 stxr (sim_cpu *cpu) 1543 { 1544 unsigned rn = INSTR (9, 5); 1545 unsigned rt = INSTR (4, 0); 1546 unsigned rs = INSTR (20, 16); 1547 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1548 int size = INSTR (31, 30); 1549 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP); 1550 1551 switch (size) 1552 { 1553 case 0: aarch64_set_mem_u8 (cpu, address, data); break; 1554 case 1: aarch64_set_mem_u16 (cpu, address, data); break; 1555 case 2: aarch64_set_mem_u32 (cpu, address, data); break; 1556 case 3: aarch64_set_mem_u64 (cpu, address, data); break; 1557 } 1558 1559 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1560 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */ 1561 } 1562 1563 static void 1564 dexLoadLiteral (sim_cpu *cpu) 1565 { 1566 /* instr[29,27] == 011 1567 instr[25,24] == 00 1568 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS 1569 010 ==> LDRX, 011 ==> FLDRD 1570 100 ==> LDRSW, 101 ==> FLDRQ 1571 110 ==> PRFM, 111 ==> UNALLOC 1572 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg 1573 instr[23, 5] == simm19 */ 1574 1575 /* unsigned rt = INSTR (4, 0); */ 1576 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26); 1577 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5); 1578 1579 switch (dispatch) 1580 { 1581 case 0: ldr32_pcrel (cpu, imm); break; 1582 case 1: fldrs_pcrel (cpu, imm); break; 1583 case 2: ldr_pcrel (cpu, imm); break; 1584 case 3: fldrd_pcrel (cpu, imm); break; 1585 case 4: ldrsw_pcrel (cpu, imm); break; 1586 case 5: fldrq_pcrel (cpu, imm); break; 1587 case 6: prfm_pcrel (cpu, imm); break; 1588 case 7: 1589 default: 1590 HALT_UNALLOC; 1591 } 1592 } 1593 1594 /* Immediate arithmetic 1595 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned 1596 value left shifted by 12 bits (done at decode). 1597 1598 N.B. the register args (dest, source) can normally be Xn or SP. 1599 the exception occurs for flag setting instructions which may 1600 only use Xn for the output (dest). */ 1601 1602 /* 32 bit add immediate. */ 1603 static void 1604 add32 (sim_cpu *cpu, uint32_t aimm) 1605 { 1606 unsigned rn = INSTR (9, 5); 1607 unsigned rd = INSTR (4, 0); 1608 1609 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1610 aarch64_set_reg_u64 (cpu, rd, SP_OK, 1611 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm); 1612 } 1613 1614 /* 64 bit add immediate. */ 1615 static void 1616 add64 (sim_cpu *cpu, uint32_t aimm) 1617 { 1618 unsigned rn = INSTR (9, 5); 1619 unsigned rd = INSTR (4, 0); 1620 1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1622 aarch64_set_reg_u64 (cpu, rd, SP_OK, 1623 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm); 1624 } 1625 1626 static void 1627 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2) 1628 { 1629 int32_t result = value1 + value2; 1630 int64_t sresult = (int64_t) value1 + (int64_t) value2; 1631 uint64_t uresult = (uint64_t)(uint32_t) value1 1632 + (uint64_t)(uint32_t) value2; 1633 uint32_t flags = 0; 1634 1635 if (result == 0) 1636 flags |= Z; 1637 1638 if (result & (1 << 31)) 1639 flags |= N; 1640 1641 if (uresult != (uint32_t)uresult) 1642 flags |= C; 1643 1644 if (sresult != (int32_t)sresult) 1645 flags |= V; 1646 1647 aarch64_set_CPSR (cpu, flags); 1648 } 1649 1650 #define NEG(a) (((a) & signbit) == signbit) 1651 #define POS(a) (((a) & signbit) == 0) 1652 1653 static void 1654 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) 1655 { 1656 uint64_t result = value1 + value2; 1657 uint32_t flags = 0; 1658 uint64_t signbit = 1ULL << 63; 1659 1660 if (result == 0) 1661 flags |= Z; 1662 1663 if (NEG (result)) 1664 flags |= N; 1665 1666 if ( (NEG (value1) && NEG (value2)) 1667 || (NEG (value1) && POS (result)) 1668 || (NEG (value2) && POS (result))) 1669 flags |= C; 1670 1671 if ( (NEG (value1) && NEG (value2) && POS (result)) 1672 || (POS (value1) && POS (value2) && NEG (result))) 1673 flags |= V; 1674 1675 aarch64_set_CPSR (cpu, flags); 1676 } 1677 1678 static void 1679 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2) 1680 { 1681 uint32_t result = value1 - value2; 1682 uint32_t flags = 0; 1683 uint32_t signbit = 1U << 31; 1684 1685 if (result == 0) 1686 flags |= Z; 1687 1688 if (NEG (result)) 1689 flags |= N; 1690 1691 if ( (NEG (value1) && POS (value2)) 1692 || (NEG (value1) && POS (result)) 1693 || (POS (value2) && POS (result))) 1694 flags |= C; 1695 1696 if ( (NEG (value1) && POS (value2) && POS (result)) 1697 || (POS (value1) && NEG (value2) && NEG (result))) 1698 flags |= V; 1699 1700 aarch64_set_CPSR (cpu, flags); 1701 } 1702 1703 static void 1704 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) 1705 { 1706 uint64_t result = value1 - value2; 1707 uint32_t flags = 0; 1708 uint64_t signbit = 1ULL << 63; 1709 1710 if (result == 0) 1711 flags |= Z; 1712 1713 if (NEG (result)) 1714 flags |= N; 1715 1716 if ( (NEG (value1) && POS (value2)) 1717 || (NEG (value1) && POS (result)) 1718 || (POS (value2) && POS (result))) 1719 flags |= C; 1720 1721 if ( (NEG (value1) && POS (value2) && POS (result)) 1722 || (POS (value1) && NEG (value2) && NEG (result))) 1723 flags |= V; 1724 1725 aarch64_set_CPSR (cpu, flags); 1726 } 1727 1728 static void 1729 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result) 1730 { 1731 uint32_t flags = 0; 1732 1733 if (result == 0) 1734 flags |= Z; 1735 else 1736 flags &= ~ Z; 1737 1738 if (result & (1 << 31)) 1739 flags |= N; 1740 else 1741 flags &= ~ N; 1742 1743 aarch64_set_CPSR (cpu, flags); 1744 } 1745 1746 static void 1747 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result) 1748 { 1749 uint32_t flags = 0; 1750 1751 if (result == 0) 1752 flags |= Z; 1753 else 1754 flags &= ~ Z; 1755 1756 if (result & (1ULL << 63)) 1757 flags |= N; 1758 else 1759 flags &= ~ N; 1760 1761 aarch64_set_CPSR (cpu, flags); 1762 } 1763 1764 /* 32 bit add immediate set flags. */ 1765 static void 1766 adds32 (sim_cpu *cpu, uint32_t aimm) 1767 { 1768 unsigned rn = INSTR (9, 5); 1769 unsigned rd = INSTR (4, 0); 1770 /* TODO : do we need to worry about signs here? */ 1771 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK); 1772 1773 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1774 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm); 1775 set_flags_for_add32 (cpu, value1, aimm); 1776 } 1777 1778 /* 64 bit add immediate set flags. */ 1779 static void 1780 adds64 (sim_cpu *cpu, uint32_t aimm) 1781 { 1782 unsigned rn = INSTR (9, 5); 1783 unsigned rd = INSTR (4, 0); 1784 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1785 uint64_t value2 = aimm; 1786 1787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1788 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 1789 set_flags_for_add64 (cpu, value1, value2); 1790 } 1791 1792 /* 32 bit sub immediate. */ 1793 static void 1794 sub32 (sim_cpu *cpu, uint32_t aimm) 1795 { 1796 unsigned rn = INSTR (9, 5); 1797 unsigned rd = INSTR (4, 0); 1798 1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1800 aarch64_set_reg_u64 (cpu, rd, SP_OK, 1801 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm); 1802 } 1803 1804 /* 64 bit sub immediate. */ 1805 static void 1806 sub64 (sim_cpu *cpu, uint32_t aimm) 1807 { 1808 unsigned rn = INSTR (9, 5); 1809 unsigned rd = INSTR (4, 0); 1810 1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1812 aarch64_set_reg_u64 (cpu, rd, SP_OK, 1813 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm); 1814 } 1815 1816 /* 32 bit sub immediate set flags. */ 1817 static void 1818 subs32 (sim_cpu *cpu, uint32_t aimm) 1819 { 1820 unsigned rn = INSTR (9, 5); 1821 unsigned rd = INSTR (4, 0); 1822 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1823 uint32_t value2 = aimm; 1824 1825 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1826 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 1827 set_flags_for_sub32 (cpu, value1, value2); 1828 } 1829 1830 /* 64 bit sub immediate set flags. */ 1831 static void 1832 subs64 (sim_cpu *cpu, uint32_t aimm) 1833 { 1834 unsigned rn = INSTR (9, 5); 1835 unsigned rd = INSTR (4, 0); 1836 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 1837 uint32_t value2 = aimm; 1838 1839 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1840 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 1841 set_flags_for_sub64 (cpu, value1, value2); 1842 } 1843 1844 /* Data Processing Register. */ 1845 1846 /* First two helpers to perform the shift operations. */ 1847 1848 static inline uint32_t 1849 shifted32 (uint32_t value, Shift shift, uint32_t count) 1850 { 1851 switch (shift) 1852 { 1853 default: 1854 case LSL: 1855 return (value << count); 1856 case LSR: 1857 return (value >> count); 1858 case ASR: 1859 { 1860 int32_t svalue = value; 1861 return (svalue >> count); 1862 } 1863 case ROR: 1864 { 1865 uint32_t top = value >> count; 1866 uint32_t bottom = value << (32 - count); 1867 return (bottom | top); 1868 } 1869 } 1870 } 1871 1872 static inline uint64_t 1873 shifted64 (uint64_t value, Shift shift, uint32_t count) 1874 { 1875 switch (shift) 1876 { 1877 default: 1878 case LSL: 1879 return (value << count); 1880 case LSR: 1881 return (value >> count); 1882 case ASR: 1883 { 1884 int64_t svalue = value; 1885 return (svalue >> count); 1886 } 1887 case ROR: 1888 { 1889 uint64_t top = value >> count; 1890 uint64_t bottom = value << (64 - count); 1891 return (bottom | top); 1892 } 1893 } 1894 } 1895 1896 /* Arithmetic shifted register. 1897 These allow an optional LSL, ASR or LSR to the second source 1898 register with a count up to the register bit count. 1899 1900 N.B register args may not be SP. */ 1901 1902 /* 32 bit ADD shifted register. */ 1903 static void 1904 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1905 { 1906 unsigned rm = INSTR (20, 16); 1907 unsigned rn = INSTR (9, 5); 1908 unsigned rd = INSTR (4, 0); 1909 1910 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1911 aarch64_set_reg_u64 (cpu, rd, NO_SP, 1912 aarch64_get_reg_u32 (cpu, rn, NO_SP) 1913 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1914 shift, count)); 1915 } 1916 1917 /* 64 bit ADD shifted register. */ 1918 static void 1919 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1920 { 1921 unsigned rm = INSTR (20, 16); 1922 unsigned rn = INSTR (9, 5); 1923 unsigned rd = INSTR (4, 0); 1924 1925 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1926 aarch64_set_reg_u64 (cpu, rd, NO_SP, 1927 aarch64_get_reg_u64 (cpu, rn, NO_SP) 1928 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 1929 shift, count)); 1930 } 1931 1932 /* 32 bit ADD shifted register setting flags. */ 1933 static void 1934 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1935 { 1936 unsigned rm = INSTR (20, 16); 1937 unsigned rn = INSTR (9, 5); 1938 unsigned rd = INSTR (4, 0); 1939 1940 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 1941 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1942 shift, count); 1943 1944 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1945 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 1946 set_flags_for_add32 (cpu, value1, value2); 1947 } 1948 1949 /* 64 bit ADD shifted register setting flags. */ 1950 static void 1951 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1952 { 1953 unsigned rm = INSTR (20, 16); 1954 unsigned rn = INSTR (9, 5); 1955 unsigned rd = INSTR (4, 0); 1956 1957 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 1958 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 1959 shift, count); 1960 1961 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1962 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 1963 set_flags_for_add64 (cpu, value1, value2); 1964 } 1965 1966 /* 32 bit SUB shifted register. */ 1967 static void 1968 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1969 { 1970 unsigned rm = INSTR (20, 16); 1971 unsigned rn = INSTR (9, 5); 1972 unsigned rd = INSTR (4, 0); 1973 1974 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1975 aarch64_set_reg_u64 (cpu, rd, NO_SP, 1976 aarch64_get_reg_u32 (cpu, rn, NO_SP) 1977 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 1978 shift, count)); 1979 } 1980 1981 /* 64 bit SUB shifted register. */ 1982 static void 1983 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1984 { 1985 unsigned rm = INSTR (20, 16); 1986 unsigned rn = INSTR (9, 5); 1987 unsigned rd = INSTR (4, 0); 1988 1989 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 1990 aarch64_set_reg_u64 (cpu, rd, NO_SP, 1991 aarch64_get_reg_u64 (cpu, rn, NO_SP) 1992 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 1993 shift, count)); 1994 } 1995 1996 /* 32 bit SUB shifted register setting flags. */ 1997 static void 1998 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 1999 { 2000 unsigned rm = INSTR (20, 16); 2001 unsigned rn = INSTR (9, 5); 2002 unsigned rd = INSTR (4, 0); 2003 2004 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 2005 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 2006 shift, count); 2007 2008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2009 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 2010 set_flags_for_sub32 (cpu, value1, value2); 2011 } 2012 2013 /* 64 bit SUB shifted register setting flags. */ 2014 static void 2015 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 2016 { 2017 unsigned rm = INSTR (20, 16); 2018 unsigned rn = INSTR (9, 5); 2019 unsigned rd = INSTR (4, 0); 2020 2021 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 2022 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 2023 shift, count); 2024 2025 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2026 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 2027 set_flags_for_sub64 (cpu, value1, value2); 2028 } 2029 2030 /* First a couple more helpers to fetch the 2031 relevant source register element either 2032 sign or zero extended as required by the 2033 extension value. */ 2034 2035 static uint32_t 2036 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension) 2037 { 2038 switch (extension) 2039 { 2040 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP); 2041 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP); 2042 case UXTW: /* Fall through. */ 2043 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP); 2044 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP); 2045 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP); 2046 case SXTW: /* Fall through. */ 2047 case SXTX: /* Fall through. */ 2048 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP); 2049 } 2050 } 2051 2052 static uint64_t 2053 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension) 2054 { 2055 switch (extension) 2056 { 2057 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP); 2058 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP); 2059 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP); 2060 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP); 2061 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP); 2062 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP); 2063 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP); 2064 case SXTX: 2065 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP); 2066 } 2067 } 2068 2069 /* Arithmetic extending register 2070 These allow an optional sign extension of some portion of the 2071 second source register followed by an optional left shift of 2072 between 1 and 4 bits (i.e. a shift of 0-4 bits???) 2073 2074 N.B output (dest) and first input arg (source) may normally be Xn 2075 or SP. However, for flag setting operations dest can only be 2076 Xn. Second input registers are always Xn. */ 2077 2078 /* 32 bit ADD extending register. */ 2079 static void 2080 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2081 { 2082 unsigned rm = INSTR (20, 16); 2083 unsigned rn = INSTR (9, 5); 2084 unsigned rd = INSTR (4, 0); 2085 2086 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2087 aarch64_set_reg_u64 (cpu, rd, SP_OK, 2088 aarch64_get_reg_u32 (cpu, rn, SP_OK) 2089 + (extreg32 (cpu, rm, extension) << shift)); 2090 } 2091 2092 /* 64 bit ADD extending register. 2093 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */ 2094 static void 2095 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2096 { 2097 unsigned rm = INSTR (20, 16); 2098 unsigned rn = INSTR (9, 5); 2099 unsigned rd = INSTR (4, 0); 2100 2101 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2102 aarch64_set_reg_u64 (cpu, rd, SP_OK, 2103 aarch64_get_reg_u64 (cpu, rn, SP_OK) 2104 + (extreg64 (cpu, rm, extension) << shift)); 2105 } 2106 2107 /* 32 bit ADD extending register setting flags. */ 2108 static void 2109 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2110 { 2111 unsigned rm = INSTR (20, 16); 2112 unsigned rn = INSTR (9, 5); 2113 unsigned rd = INSTR (4, 0); 2114 2115 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); 2116 uint32_t value2 = extreg32 (cpu, rm, extension) << shift; 2117 2118 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2119 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 2120 set_flags_for_add32 (cpu, value1, value2); 2121 } 2122 2123 /* 64 bit ADD extending register setting flags */ 2124 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */ 2125 static void 2126 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2127 { 2128 unsigned rm = INSTR (20, 16); 2129 unsigned rn = INSTR (9, 5); 2130 unsigned rd = INSTR (4, 0); 2131 2132 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 2133 uint64_t value2 = extreg64 (cpu, rm, extension) << shift; 2134 2135 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2136 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); 2137 set_flags_for_add64 (cpu, value1, value2); 2138 } 2139 2140 /* 32 bit SUB extending register. */ 2141 static void 2142 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2143 { 2144 unsigned rm = INSTR (20, 16); 2145 unsigned rn = INSTR (9, 5); 2146 unsigned rd = INSTR (4, 0); 2147 2148 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2149 aarch64_set_reg_u64 (cpu, rd, SP_OK, 2150 aarch64_get_reg_u32 (cpu, rn, SP_OK) 2151 - (extreg32 (cpu, rm, extension) << shift)); 2152 } 2153 2154 /* 64 bit SUB extending register. */ 2155 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */ 2156 static void 2157 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2158 { 2159 unsigned rm = INSTR (20, 16); 2160 unsigned rn = INSTR (9, 5); 2161 unsigned rd = INSTR (4, 0); 2162 2163 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2164 aarch64_set_reg_u64 (cpu, rd, SP_OK, 2165 aarch64_get_reg_u64 (cpu, rn, SP_OK) 2166 - (extreg64 (cpu, rm, extension) << shift)); 2167 } 2168 2169 /* 32 bit SUB extending register setting flags. */ 2170 static void 2171 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2172 { 2173 unsigned rm = INSTR (20, 16); 2174 unsigned rn = INSTR (9, 5); 2175 unsigned rd = INSTR (4, 0); 2176 2177 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); 2178 uint32_t value2 = extreg32 (cpu, rm, extension) << shift; 2179 2180 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2181 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 2182 set_flags_for_sub32 (cpu, value1, value2); 2183 } 2184 2185 /* 64 bit SUB extending register setting flags */ 2186 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */ 2187 static void 2188 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) 2189 { 2190 unsigned rm = INSTR (20, 16); 2191 unsigned rn = INSTR (9, 5); 2192 unsigned rd = INSTR (4, 0); 2193 2194 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); 2195 uint64_t value2 = extreg64 (cpu, rm, extension) << shift; 2196 2197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2198 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); 2199 set_flags_for_sub64 (cpu, value1, value2); 2200 } 2201 2202 static void 2203 dexAddSubtractImmediate (sim_cpu *cpu) 2204 { 2205 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2206 instr[30] = op : 0 ==> ADD, 1 ==> SUB 2207 instr[29] = set : 0 ==> no flags, 1 ==> set flags 2208 instr[28,24] = 10001 2209 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC 2210 instr[21,10] = uimm12 2211 instr[9,5] = Rn 2212 instr[4,0] = Rd */ 2213 2214 /* N.B. the shift is applied at decode before calling the add/sub routine. */ 2215 uint32_t shift = INSTR (23, 22); 2216 uint32_t imm = INSTR (21, 10); 2217 uint32_t dispatch = INSTR (31, 29); 2218 2219 NYI_assert (28, 24, 0x11); 2220 2221 if (shift > 1) 2222 HALT_UNALLOC; 2223 2224 if (shift) 2225 imm <<= 12; 2226 2227 switch (dispatch) 2228 { 2229 case 0: add32 (cpu, imm); break; 2230 case 1: adds32 (cpu, imm); break; 2231 case 2: sub32 (cpu, imm); break; 2232 case 3: subs32 (cpu, imm); break; 2233 case 4: add64 (cpu, imm); break; 2234 case 5: adds64 (cpu, imm); break; 2235 case 6: sub64 (cpu, imm); break; 2236 case 7: subs64 (cpu, imm); break; 2237 } 2238 } 2239 2240 static void 2241 dexAddSubtractShiftedRegister (sim_cpu *cpu) 2242 { 2243 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2244 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS 2245 instr[28,24] = 01011 2246 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC 2247 instr[21] = 0 2248 instr[20,16] = Rm 2249 instr[15,10] = count : must be 0xxxxx for 32 bit 2250 instr[9,5] = Rn 2251 instr[4,0] = Rd */ 2252 2253 uint32_t size = INSTR (31, 31); 2254 uint32_t count = INSTR (15, 10); 2255 Shift shiftType = INSTR (23, 22); 2256 2257 NYI_assert (28, 24, 0x0B); 2258 NYI_assert (21, 21, 0); 2259 2260 /* Shift encoded as ROR is unallocated. */ 2261 if (shiftType == ROR) 2262 HALT_UNALLOC; 2263 2264 /* 32 bit operations must have count[5] = 0 2265 or else we have an UNALLOC. */ 2266 if (size == 0 && uimm (count, 5, 5)) 2267 HALT_UNALLOC; 2268 2269 /* Dispatch on size:op i.e instr [31,29]. */ 2270 switch (INSTR (31, 29)) 2271 { 2272 case 0: add32_shift (cpu, shiftType, count); break; 2273 case 1: adds32_shift (cpu, shiftType, count); break; 2274 case 2: sub32_shift (cpu, shiftType, count); break; 2275 case 3: subs32_shift (cpu, shiftType, count); break; 2276 case 4: add64_shift (cpu, shiftType, count); break; 2277 case 5: adds64_shift (cpu, shiftType, count); break; 2278 case 6: sub64_shift (cpu, shiftType, count); break; 2279 case 7: subs64_shift (cpu, shiftType, count); break; 2280 } 2281 } 2282 2283 static void 2284 dexAddSubtractExtendedRegister (sim_cpu *cpu) 2285 { 2286 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2287 instr[30] = op : 0 ==> ADD, 1 ==> SUB 2288 instr[29] = set? : 0 ==> no flags, 1 ==> set flags 2289 instr[28,24] = 01011 2290 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC 2291 instr[21] = 1 2292 instr[20,16] = Rm 2293 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH, 2294 000 ==> LSL|UXTW, 001 ==> UXTZ, 2295 000 ==> SXTB, 001 ==> SXTH, 2296 000 ==> SXTW, 001 ==> SXTX, 2297 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC 2298 instr[9,5] = Rn 2299 instr[4,0] = Rd */ 2300 2301 Extension extensionType = INSTR (15, 13); 2302 uint32_t shift = INSTR (12, 10); 2303 2304 NYI_assert (28, 24, 0x0B); 2305 NYI_assert (21, 21, 1); 2306 2307 /* Shift may not exceed 4. */ 2308 if (shift > 4) 2309 HALT_UNALLOC; 2310 2311 /* Dispatch on size:op:set?. */ 2312 switch (INSTR (31, 29)) 2313 { 2314 case 0: add32_ext (cpu, extensionType, shift); break; 2315 case 1: adds32_ext (cpu, extensionType, shift); break; 2316 case 2: sub32_ext (cpu, extensionType, shift); break; 2317 case 3: subs32_ext (cpu, extensionType, shift); break; 2318 case 4: add64_ext (cpu, extensionType, shift); break; 2319 case 5: adds64_ext (cpu, extensionType, shift); break; 2320 case 6: sub64_ext (cpu, extensionType, shift); break; 2321 case 7: subs64_ext (cpu, extensionType, shift); break; 2322 } 2323 } 2324 2325 /* Conditional data processing 2326 Condition register is implicit 3rd source. */ 2327 2328 /* 32 bit add with carry. */ 2329 /* N.B register args may not be SP. */ 2330 2331 static void 2332 adc32 (sim_cpu *cpu) 2333 { 2334 unsigned rm = INSTR (20, 16); 2335 unsigned rn = INSTR (9, 5); 2336 unsigned rd = INSTR (4, 0); 2337 2338 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2339 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2340 aarch64_get_reg_u32 (cpu, rn, NO_SP) 2341 + aarch64_get_reg_u32 (cpu, rm, NO_SP) 2342 + IS_SET (C)); 2343 } 2344 2345 /* 64 bit add with carry */ 2346 static void 2347 adc64 (sim_cpu *cpu) 2348 { 2349 unsigned rm = INSTR (20, 16); 2350 unsigned rn = INSTR (9, 5); 2351 unsigned rd = INSTR (4, 0); 2352 2353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2354 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2355 aarch64_get_reg_u64 (cpu, rn, NO_SP) 2356 + aarch64_get_reg_u64 (cpu, rm, NO_SP) 2357 + IS_SET (C)); 2358 } 2359 2360 /* 32 bit add with carry setting flags. */ 2361 static void 2362 adcs32 (sim_cpu *cpu) 2363 { 2364 unsigned rm = INSTR (20, 16); 2365 unsigned rn = INSTR (9, 5); 2366 unsigned rd = INSTR (4, 0); 2367 2368 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 2369 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); 2370 uint32_t carry = IS_SET (C); 2371 2372 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2373 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); 2374 set_flags_for_add32 (cpu, value1, value2 + carry); 2375 } 2376 2377 /* 64 bit add with carry setting flags. */ 2378 static void 2379 adcs64 (sim_cpu *cpu) 2380 { 2381 unsigned rm = INSTR (20, 16); 2382 unsigned rn = INSTR (9, 5); 2383 unsigned rd = INSTR (4, 0); 2384 2385 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 2386 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); 2387 uint64_t carry = IS_SET (C); 2388 2389 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2390 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); 2391 set_flags_for_add64 (cpu, value1, value2 + carry); 2392 } 2393 2394 /* 32 bit sub with carry. */ 2395 static void 2396 sbc32 (sim_cpu *cpu) 2397 { 2398 unsigned rm = INSTR (20, 16); 2399 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */ 2400 unsigned rd = INSTR (4, 0); 2401 2402 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2403 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2404 aarch64_get_reg_u32 (cpu, rn, NO_SP) 2405 - aarch64_get_reg_u32 (cpu, rm, NO_SP) 2406 - 1 + IS_SET (C)); 2407 } 2408 2409 /* 64 bit sub with carry */ 2410 static void 2411 sbc64 (sim_cpu *cpu) 2412 { 2413 unsigned rm = INSTR (20, 16); 2414 unsigned rn = INSTR (9, 5); 2415 unsigned rd = INSTR (4, 0); 2416 2417 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2418 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2419 aarch64_get_reg_u64 (cpu, rn, NO_SP) 2420 - aarch64_get_reg_u64 (cpu, rm, NO_SP) 2421 - 1 + IS_SET (C)); 2422 } 2423 2424 /* 32 bit sub with carry setting flags */ 2425 static void 2426 sbcs32 (sim_cpu *cpu) 2427 { 2428 unsigned rm = INSTR (20, 16); 2429 unsigned rn = INSTR (9, 5); 2430 unsigned rd = INSTR (4, 0); 2431 2432 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 2433 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); 2434 uint32_t carry = IS_SET (C); 2435 uint32_t result = value1 - value2 + 1 - carry; 2436 2437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2438 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 2439 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry); 2440 } 2441 2442 /* 64 bit sub with carry setting flags */ 2443 static void 2444 sbcs64 (sim_cpu *cpu) 2445 { 2446 unsigned rm = INSTR (20, 16); 2447 unsigned rn = INSTR (9, 5); 2448 unsigned rd = INSTR (4, 0); 2449 2450 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 2451 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); 2452 uint64_t carry = IS_SET (C); 2453 uint64_t result = value1 - value2 + 1 - carry; 2454 2455 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2456 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 2457 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry); 2458 } 2459 2460 static void 2461 dexAddSubtractWithCarry (sim_cpu *cpu) 2462 { 2463 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2464 instr[30] = op : 0 ==> ADC, 1 ==> SBC 2465 instr[29] = set? : 0 ==> no flags, 1 ==> set flags 2466 instr[28,21] = 1 1010 000 2467 instr[20,16] = Rm 2468 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC 2469 instr[9,5] = Rn 2470 instr[4,0] = Rd */ 2471 2472 uint32_t op2 = INSTR (15, 10); 2473 2474 NYI_assert (28, 21, 0xD0); 2475 2476 if (op2 != 0) 2477 HALT_UNALLOC; 2478 2479 /* Dispatch on size:op:set?. */ 2480 switch (INSTR (31, 29)) 2481 { 2482 case 0: adc32 (cpu); break; 2483 case 1: adcs32 (cpu); break; 2484 case 2: sbc32 (cpu); break; 2485 case 3: sbcs32 (cpu); break; 2486 case 4: adc64 (cpu); break; 2487 case 5: adcs64 (cpu); break; 2488 case 6: sbc64 (cpu); break; 2489 case 7: sbcs64 (cpu); break; 2490 } 2491 } 2492 2493 static uint32_t 2494 testConditionCode (sim_cpu *cpu, CondCode cc) 2495 { 2496 /* This should be reduceable to branchless logic 2497 by some careful testing of bits in CC followed 2498 by the requisite masking and combining of bits 2499 from the flag register. 2500 2501 For now we do it with a switch. */ 2502 int res; 2503 2504 switch (cc) 2505 { 2506 case EQ: res = IS_SET (Z); break; 2507 case NE: res = IS_CLEAR (Z); break; 2508 case CS: res = IS_SET (C); break; 2509 case CC: res = IS_CLEAR (C); break; 2510 case MI: res = IS_SET (N); break; 2511 case PL: res = IS_CLEAR (N); break; 2512 case VS: res = IS_SET (V); break; 2513 case VC: res = IS_CLEAR (V); break; 2514 case HI: res = IS_SET (C) && IS_CLEAR (Z); break; 2515 case LS: res = IS_CLEAR (C) || IS_SET (Z); break; 2516 case GE: res = IS_SET (N) == IS_SET (V); break; 2517 case LT: res = IS_SET (N) != IS_SET (V); break; 2518 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break; 2519 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break; 2520 case AL: 2521 case NV: 2522 default: 2523 res = 1; 2524 break; 2525 } 2526 return res; 2527 } 2528 2529 static void 2530 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */ 2531 { 2532 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 2533 instr[30] = compare with positive (1) or negative value (0) 2534 instr[29,21] = 1 1101 0010 2535 instr[20,16] = Rm or const 2536 instr[15,12] = cond 2537 instr[11] = compare reg (0) or const (1) 2538 instr[10] = 0 2539 instr[9,5] = Rn 2540 instr[4] = 0 2541 instr[3,0] = value for CPSR bits if the comparison does not take place. */ 2542 signed int negate; 2543 unsigned rm; 2544 unsigned rn; 2545 2546 NYI_assert (29, 21, 0x1d2); 2547 NYI_assert (10, 10, 0); 2548 NYI_assert (4, 4, 0); 2549 2550 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2551 if (! testConditionCode (cpu, INSTR (15, 12))) 2552 { 2553 aarch64_set_CPSR (cpu, INSTR (3, 0)); 2554 return; 2555 } 2556 2557 negate = INSTR (30, 30) ? 1 : -1; 2558 rm = INSTR (20, 16); 2559 rn = INSTR ( 9, 5); 2560 2561 if (INSTR (31, 31)) 2562 { 2563 if (INSTR (11, 11)) 2564 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), 2565 negate * (uint64_t) rm); 2566 else 2567 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), 2568 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK)); 2569 } 2570 else 2571 { 2572 if (INSTR (11, 11)) 2573 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), 2574 negate * rm); 2575 else 2576 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), 2577 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK)); 2578 } 2579 } 2580 2581 static void 2582 do_vec_MOV_whole_vector (sim_cpu *cpu) 2583 { 2584 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm) 2585 2586 instr[31] = 0 2587 instr[30] = half(0)/full(1) 2588 instr[29,21] = 001110101 2589 instr[20,16] = Vs 2590 instr[15,10] = 000111 2591 instr[9,5] = Vs 2592 instr[4,0] = Vd */ 2593 2594 unsigned vs = INSTR (9, 5); 2595 unsigned vd = INSTR (4, 0); 2596 2597 NYI_assert (29, 21, 0x075); 2598 NYI_assert (15, 10, 0x07); 2599 2600 if (INSTR (20, 16) != vs) 2601 HALT_NYI; 2602 2603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2604 if (INSTR (30, 30)) 2605 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1)); 2606 2607 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0)); 2608 } 2609 2610 static void 2611 do_vec_SMOV_into_scalar (sim_cpu *cpu) 2612 { 2613 /* instr[31] = 0 2614 instr[30] = word(0)/long(1) 2615 instr[29,21] = 00 1110 000 2616 instr[20,16] = element size and index 2617 instr[15,10] = 00 0010 11 2618 instr[9,5] = V source 2619 instr[4,0] = R dest */ 2620 2621 unsigned vs = INSTR (9, 5); 2622 unsigned rd = INSTR (4, 0); 2623 unsigned imm5 = INSTR (20, 16); 2624 unsigned full = INSTR (30, 30); 2625 int size, index; 2626 2627 NYI_assert (29, 21, 0x070); 2628 NYI_assert (15, 10, 0x0B); 2629 2630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2631 2632 if (imm5 & 0x1) 2633 { 2634 size = 0; 2635 index = (imm5 >> 1) & 0xF; 2636 } 2637 else if (imm5 & 0x2) 2638 { 2639 size = 1; 2640 index = (imm5 >> 2) & 0x7; 2641 } 2642 else if (full && (imm5 & 0x4)) 2643 { 2644 size = 2; 2645 index = (imm5 >> 3) & 0x3; 2646 } 2647 else 2648 HALT_UNALLOC; 2649 2650 switch (size) 2651 { 2652 case 0: 2653 if (full) 2654 aarch64_set_reg_s64 (cpu, rd, NO_SP, 2655 aarch64_get_vec_s8 (cpu, vs, index)); 2656 else 2657 aarch64_set_reg_s32 (cpu, rd, NO_SP, 2658 aarch64_get_vec_s8 (cpu, vs, index)); 2659 break; 2660 2661 case 1: 2662 if (full) 2663 aarch64_set_reg_s64 (cpu, rd, NO_SP, 2664 aarch64_get_vec_s16 (cpu, vs, index)); 2665 else 2666 aarch64_set_reg_s32 (cpu, rd, NO_SP, 2667 aarch64_get_vec_s16 (cpu, vs, index)); 2668 break; 2669 2670 case 2: 2671 aarch64_set_reg_s64 (cpu, rd, NO_SP, 2672 aarch64_get_vec_s32 (cpu, vs, index)); 2673 break; 2674 2675 default: 2676 HALT_UNALLOC; 2677 } 2678 } 2679 2680 static void 2681 do_vec_UMOV_into_scalar (sim_cpu *cpu) 2682 { 2683 /* instr[31] = 0 2684 instr[30] = word(0)/long(1) 2685 instr[29,21] = 00 1110 000 2686 instr[20,16] = element size and index 2687 instr[15,10] = 00 0011 11 2688 instr[9,5] = V source 2689 instr[4,0] = R dest */ 2690 2691 unsigned vs = INSTR (9, 5); 2692 unsigned rd = INSTR (4, 0); 2693 unsigned imm5 = INSTR (20, 16); 2694 unsigned full = INSTR (30, 30); 2695 int size, index; 2696 2697 NYI_assert (29, 21, 0x070); 2698 NYI_assert (15, 10, 0x0F); 2699 2700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2701 2702 if (!full) 2703 { 2704 if (imm5 & 0x1) 2705 { 2706 size = 0; 2707 index = (imm5 >> 1) & 0xF; 2708 } 2709 else if (imm5 & 0x2) 2710 { 2711 size = 1; 2712 index = (imm5 >> 2) & 0x7; 2713 } 2714 else if (imm5 & 0x4) 2715 { 2716 size = 2; 2717 index = (imm5 >> 3) & 0x3; 2718 } 2719 else 2720 HALT_UNALLOC; 2721 } 2722 else if (imm5 & 0x8) 2723 { 2724 size = 3; 2725 index = (imm5 >> 4) & 0x1; 2726 } 2727 else 2728 HALT_UNALLOC; 2729 2730 switch (size) 2731 { 2732 case 0: 2733 aarch64_set_reg_u32 (cpu, rd, NO_SP, 2734 aarch64_get_vec_u8 (cpu, vs, index)); 2735 break; 2736 2737 case 1: 2738 aarch64_set_reg_u32 (cpu, rd, NO_SP, 2739 aarch64_get_vec_u16 (cpu, vs, index)); 2740 break; 2741 2742 case 2: 2743 aarch64_set_reg_u32 (cpu, rd, NO_SP, 2744 aarch64_get_vec_u32 (cpu, vs, index)); 2745 break; 2746 2747 case 3: 2748 aarch64_set_reg_u64 (cpu, rd, NO_SP, 2749 aarch64_get_vec_u64 (cpu, vs, index)); 2750 break; 2751 2752 default: 2753 HALT_UNALLOC; 2754 } 2755 } 2756 2757 static void 2758 do_vec_INS (sim_cpu *cpu) 2759 { 2760 /* instr[31,21] = 01001110000 2761 instr[20,16] = element size and index 2762 instr[15,10] = 000111 2763 instr[9,5] = W source 2764 instr[4,0] = V dest */ 2765 2766 int index; 2767 unsigned rs = INSTR (9, 5); 2768 unsigned vd = INSTR (4, 0); 2769 2770 NYI_assert (31, 21, 0x270); 2771 NYI_assert (15, 10, 0x07); 2772 2773 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2774 if (INSTR (16, 16)) 2775 { 2776 index = INSTR (20, 17); 2777 aarch64_set_vec_u8 (cpu, vd, index, 2778 aarch64_get_reg_u8 (cpu, rs, NO_SP)); 2779 } 2780 else if (INSTR (17, 17)) 2781 { 2782 index = INSTR (20, 18); 2783 aarch64_set_vec_u16 (cpu, vd, index, 2784 aarch64_get_reg_u16 (cpu, rs, NO_SP)); 2785 } 2786 else if (INSTR (18, 18)) 2787 { 2788 index = INSTR (20, 19); 2789 aarch64_set_vec_u32 (cpu, vd, index, 2790 aarch64_get_reg_u32 (cpu, rs, NO_SP)); 2791 } 2792 else if (INSTR (19, 19)) 2793 { 2794 index = INSTR (20, 20); 2795 aarch64_set_vec_u64 (cpu, vd, index, 2796 aarch64_get_reg_u64 (cpu, rs, NO_SP)); 2797 } 2798 else 2799 HALT_NYI; 2800 } 2801 2802 static void 2803 do_vec_DUP_vector_into_vector (sim_cpu *cpu) 2804 { 2805 /* instr[31] = 0 2806 instr[30] = half(0)/full(1) 2807 instr[29,21] = 00 1110 000 2808 instr[20,16] = element size and index 2809 instr[15,10] = 0000 01 2810 instr[9,5] = V source 2811 instr[4,0] = V dest. */ 2812 2813 unsigned full = INSTR (30, 30); 2814 unsigned vs = INSTR (9, 5); 2815 unsigned vd = INSTR (4, 0); 2816 int i, index; 2817 2818 NYI_assert (29, 21, 0x070); 2819 NYI_assert (15, 10, 0x01); 2820 2821 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2822 if (INSTR (16, 16)) 2823 { 2824 index = INSTR (20, 17); 2825 2826 for (i = 0; i < (full ? 16 : 8); i++) 2827 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index)); 2828 } 2829 else if (INSTR (17, 17)) 2830 { 2831 index = INSTR (20, 18); 2832 2833 for (i = 0; i < (full ? 8 : 4); i++) 2834 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index)); 2835 } 2836 else if (INSTR (18, 18)) 2837 { 2838 index = INSTR (20, 19); 2839 2840 for (i = 0; i < (full ? 4 : 2); i++) 2841 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index)); 2842 } 2843 else 2844 { 2845 if (INSTR (19, 19) == 0) 2846 HALT_UNALLOC; 2847 2848 if (! full) 2849 HALT_UNALLOC; 2850 2851 index = INSTR (20, 20); 2852 2853 for (i = 0; i < 2; i++) 2854 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index)); 2855 } 2856 } 2857 2858 static void 2859 do_vec_TBL (sim_cpu *cpu) 2860 { 2861 /* instr[31] = 0 2862 instr[30] = half(0)/full(1) 2863 instr[29,21] = 00 1110 000 2864 instr[20,16] = Vm 2865 instr[15] = 0 2866 instr[14,13] = vec length 2867 instr[12,10] = 000 2868 instr[9,5] = V start 2869 instr[4,0] = V dest */ 2870 2871 int full = INSTR (30, 30); 2872 int len = INSTR (14, 13) + 1; 2873 unsigned vm = INSTR (20, 16); 2874 unsigned vn = INSTR (9, 5); 2875 unsigned vd = INSTR (4, 0); 2876 unsigned i; 2877 2878 NYI_assert (29, 21, 0x070); 2879 NYI_assert (12, 10, 0); 2880 2881 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2882 for (i = 0; i < (full ? 16 : 8); i++) 2883 { 2884 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i); 2885 uint8_t val; 2886 2887 if (selector < 16) 2888 val = aarch64_get_vec_u8 (cpu, vn, selector); 2889 else if (selector < 32) 2890 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16); 2891 else if (selector < 48) 2892 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32); 2893 else if (selector < 64) 2894 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48); 2895 else 2896 val = 0; 2897 2898 aarch64_set_vec_u8 (cpu, vd, i, val); 2899 } 2900 } 2901 2902 static void 2903 do_vec_TRN (sim_cpu *cpu) 2904 { 2905 /* instr[31] = 0 2906 instr[30] = half(0)/full(1) 2907 instr[29,24] = 00 1110 2908 instr[23,22] = size 2909 instr[21] = 0 2910 instr[20,16] = Vm 2911 instr[15] = 0 2912 instr[14] = TRN1 (0) / TRN2 (1) 2913 instr[13,10] = 1010 2914 instr[9,5] = V source 2915 instr[4,0] = V dest. */ 2916 2917 int full = INSTR (30, 30); 2918 int second = INSTR (14, 14); 2919 unsigned vm = INSTR (20, 16); 2920 unsigned vn = INSTR (9, 5); 2921 unsigned vd = INSTR (4, 0); 2922 unsigned i; 2923 2924 NYI_assert (29, 24, 0x0E); 2925 NYI_assert (13, 10, 0xA); 2926 2927 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2928 switch (INSTR (23, 22)) 2929 { 2930 case 0: 2931 for (i = 0; i < (full ? 8 : 4); i++) 2932 { 2933 aarch64_set_vec_u8 2934 (cpu, vd, i * 2, 2935 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2)); 2936 aarch64_set_vec_u8 2937 (cpu, vd, 1 * 2 + 1, 2938 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1)); 2939 } 2940 break; 2941 2942 case 1: 2943 for (i = 0; i < (full ? 4 : 2); i++) 2944 { 2945 aarch64_set_vec_u16 2946 (cpu, vd, i * 2, 2947 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2)); 2948 aarch64_set_vec_u16 2949 (cpu, vd, 1 * 2 + 1, 2950 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1)); 2951 } 2952 break; 2953 2954 case 2: 2955 aarch64_set_vec_u32 2956 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0)); 2957 aarch64_set_vec_u32 2958 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1)); 2959 aarch64_set_vec_u32 2960 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2)); 2961 aarch64_set_vec_u32 2962 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3)); 2963 break; 2964 2965 case 3: 2966 if (! full) 2967 HALT_UNALLOC; 2968 2969 aarch64_set_vec_u64 (cpu, vd, 0, 2970 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0)); 2971 aarch64_set_vec_u64 (cpu, vd, 1, 2972 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1)); 2973 break; 2974 } 2975 } 2976 2977 static void 2978 do_vec_DUP_scalar_into_vector (sim_cpu *cpu) 2979 { 2980 /* instr[31] = 0 2981 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits 2982 [must be 1 for 64-bit xfer] 2983 instr[29,20] = 00 1110 0000 2984 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits, 2985 0100=> 32-bits. 1000=>64-bits 2986 instr[15,10] = 0000 11 2987 instr[9,5] = W source 2988 instr[4,0] = V dest. */ 2989 2990 unsigned i; 2991 unsigned Vd = INSTR (4, 0); 2992 unsigned Rs = INSTR (9, 5); 2993 int both = INSTR (30, 30); 2994 2995 NYI_assert (29, 20, 0x0E0); 2996 NYI_assert (15, 10, 0x03); 2997 2998 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 2999 switch (INSTR (19, 16)) 3000 { 3001 case 1: 3002 for (i = 0; i < (both ? 16 : 8); i++) 3003 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP)); 3004 break; 3005 3006 case 2: 3007 for (i = 0; i < (both ? 8 : 4); i++) 3008 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP)); 3009 break; 3010 3011 case 4: 3012 for (i = 0; i < (both ? 4 : 2); i++) 3013 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP)); 3014 break; 3015 3016 case 8: 3017 if (!both) 3018 HALT_NYI; 3019 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP)); 3020 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP)); 3021 break; 3022 3023 default: 3024 HALT_NYI; 3025 } 3026 } 3027 3028 static void 3029 do_vec_UZP (sim_cpu *cpu) 3030 { 3031 /* instr[31] = 0 3032 instr[30] = half(0)/full(1) 3033 instr[29,24] = 00 1110 3034 instr[23,22] = size: byte(00), half(01), word (10), long (11) 3035 instr[21] = 0 3036 instr[20,16] = Vm 3037 instr[15] = 0 3038 instr[14] = lower (0) / upper (1) 3039 instr[13,10] = 0110 3040 instr[9,5] = Vn 3041 instr[4,0] = Vd. */ 3042 3043 int full = INSTR (30, 30); 3044 int upper = INSTR (14, 14); 3045 3046 unsigned vm = INSTR (20, 16); 3047 unsigned vn = INSTR (9, 5); 3048 unsigned vd = INSTR (4, 0); 3049 3050 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); 3051 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); 3052 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); 3053 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); 3054 3055 uint64_t val1; 3056 uint64_t val2; 3057 3058 uint64_t input2 = full ? val_n2 : val_m1; 3059 3060 NYI_assert (29, 24, 0x0E); 3061 NYI_assert (21, 21, 0); 3062 NYI_assert (15, 15, 0); 3063 NYI_assert (13, 10, 6); 3064 3065 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3066 switch (INSTR (23, 22)) 3067 { 3068 case 0: 3069 val1 = (val_n1 >> (upper * 8)) & 0xFFULL; 3070 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL; 3071 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL; 3072 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL; 3073 3074 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL; 3075 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL; 3076 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL; 3077 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL; 3078 3079 if (full) 3080 { 3081 val2 = (val_m1 >> (upper * 8)) & 0xFFULL; 3082 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL; 3083 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL; 3084 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL; 3085 3086 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL; 3087 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL; 3088 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL; 3089 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL; 3090 } 3091 break; 3092 3093 case 1: 3094 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL; 3095 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL; 3096 3097 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;; 3098 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL; 3099 3100 if (full) 3101 { 3102 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL; 3103 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL; 3104 3105 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL; 3106 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL; 3107 } 3108 break; 3109 3110 case 2: 3111 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF; 3112 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL; 3113 3114 if (full) 3115 { 3116 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF; 3117 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL; 3118 } 3119 break; 3120 3121 case 3: 3122 if (! full) 3123 HALT_UNALLOC; 3124 3125 val1 = upper ? val_n2 : val_n1; 3126 val2 = upper ? val_m2 : val_m1; 3127 break; 3128 } 3129 3130 aarch64_set_vec_u64 (cpu, vd, 0, val1); 3131 if (full) 3132 aarch64_set_vec_u64 (cpu, vd, 1, val2); 3133 } 3134 3135 static void 3136 do_vec_ZIP (sim_cpu *cpu) 3137 { 3138 /* instr[31] = 0 3139 instr[30] = half(0)/full(1) 3140 instr[29,24] = 00 1110 3141 instr[23,22] = size: byte(00), hald(01), word (10), long (11) 3142 instr[21] = 0 3143 instr[20,16] = Vm 3144 instr[15] = 0 3145 instr[14] = lower (0) / upper (1) 3146 instr[13,10] = 1110 3147 instr[9,5] = Vn 3148 instr[4,0] = Vd. */ 3149 3150 int full = INSTR (30, 30); 3151 int upper = INSTR (14, 14); 3152 3153 unsigned vm = INSTR (20, 16); 3154 unsigned vn = INSTR (9, 5); 3155 unsigned vd = INSTR (4, 0); 3156 3157 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); 3158 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); 3159 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); 3160 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); 3161 3162 uint64_t val1 = 0; 3163 uint64_t val2 = 0; 3164 3165 uint64_t input1 = upper ? val_n1 : val_m1; 3166 uint64_t input2 = upper ? val_n2 : val_m2; 3167 3168 NYI_assert (29, 24, 0x0E); 3169 NYI_assert (21, 21, 0); 3170 NYI_assert (15, 15, 0); 3171 NYI_assert (13, 10, 0xE); 3172 3173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3174 switch (INSTR (23, 23)) 3175 { 3176 case 0: 3177 val1 = 3178 ((input1 << 0) & (0xFF << 0)) 3179 | ((input2 << 8) & (0xFF << 8)) 3180 | ((input1 << 8) & (0xFF << 16)) 3181 | ((input2 << 16) & (0xFF << 24)) 3182 | ((input1 << 16) & (0xFFULL << 32)) 3183 | ((input2 << 24) & (0xFFULL << 40)) 3184 | ((input1 << 24) & (0xFFULL << 48)) 3185 | ((input2 << 32) & (0xFFULL << 56)); 3186 3187 val2 = 3188 ((input1 >> 32) & (0xFF << 0)) 3189 | ((input2 >> 24) & (0xFF << 8)) 3190 | ((input1 >> 24) & (0xFF << 16)) 3191 | ((input2 >> 16) & (0xFF << 24)) 3192 | ((input1 >> 16) & (0xFFULL << 32)) 3193 | ((input2 >> 8) & (0xFFULL << 40)) 3194 | ((input1 >> 8) & (0xFFULL << 48)) 3195 | ((input2 >> 0) & (0xFFULL << 56)); 3196 break; 3197 3198 case 1: 3199 val1 = 3200 ((input1 << 0) & (0xFFFF << 0)) 3201 | ((input2 << 16) & (0xFFFF << 16)) 3202 | ((input1 << 16) & (0xFFFFULL << 32)) 3203 | ((input2 << 32) & (0xFFFFULL << 48)); 3204 3205 val2 = 3206 ((input1 >> 32) & (0xFFFF << 0)) 3207 | ((input2 >> 16) & (0xFFFF << 16)) 3208 | ((input1 >> 16) & (0xFFFFULL << 32)) 3209 | ((input2 >> 0) & (0xFFFFULL << 48)); 3210 break; 3211 3212 case 2: 3213 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32); 3214 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32); 3215 break; 3216 3217 case 3: 3218 val1 = input1; 3219 val2 = input2; 3220 break; 3221 } 3222 3223 aarch64_set_vec_u64 (cpu, vd, 0, val1); 3224 if (full) 3225 aarch64_set_vec_u64 (cpu, vd, 1, val2); 3226 } 3227 3228 /* Floating point immediates are encoded in 8 bits. 3229 fpimm[7] = sign bit. 3230 fpimm[6:4] = signed exponent. 3231 fpimm[3:0] = fraction (assuming leading 1). 3232 i.e. F = s * 1.f * 2^(e - b). */ 3233 3234 static float 3235 fp_immediate_for_encoding_32 (uint32_t imm8) 3236 { 3237 float u; 3238 uint32_t s, e, f, i; 3239 3240 s = (imm8 >> 7) & 0x1; 3241 e = (imm8 >> 4) & 0x7; 3242 f = imm8 & 0xf; 3243 3244 /* The fp value is s * n/16 * 2r where n is 16+e. */ 3245 u = (16.0 + f) / 16.0; 3246 3247 /* N.B. exponent is signed. */ 3248 if (e < 4) 3249 { 3250 int epos = e; 3251 3252 for (i = 0; i <= epos; i++) 3253 u *= 2.0; 3254 } 3255 else 3256 { 3257 int eneg = 7 - e; 3258 3259 for (i = 0; i < eneg; i++) 3260 u /= 2.0; 3261 } 3262 3263 if (s) 3264 u = - u; 3265 3266 return u; 3267 } 3268 3269 static double 3270 fp_immediate_for_encoding_64 (uint32_t imm8) 3271 { 3272 double u; 3273 uint32_t s, e, f, i; 3274 3275 s = (imm8 >> 7) & 0x1; 3276 e = (imm8 >> 4) & 0x7; 3277 f = imm8 & 0xf; 3278 3279 /* The fp value is s * n/16 * 2r where n is 16+e. */ 3280 u = (16.0 + f) / 16.0; 3281 3282 /* N.B. exponent is signed. */ 3283 if (e < 4) 3284 { 3285 int epos = e; 3286 3287 for (i = 0; i <= epos; i++) 3288 u *= 2.0; 3289 } 3290 else 3291 { 3292 int eneg = 7 - e; 3293 3294 for (i = 0; i < eneg; i++) 3295 u /= 2.0; 3296 } 3297 3298 if (s) 3299 u = - u; 3300 3301 return u; 3302 } 3303 3304 static void 3305 do_vec_MOV_immediate (sim_cpu *cpu) 3306 { 3307 /* instr[31] = 0 3308 instr[30] = full/half selector 3309 instr[29,19] = 00111100000 3310 instr[18,16] = high 3 bits of uimm8 3311 instr[15,12] = size & shift: 3312 0000 => 32-bit 3313 0010 => 32-bit + LSL#8 3314 0100 => 32-bit + LSL#16 3315 0110 => 32-bit + LSL#24 3316 1010 => 16-bit + LSL#8 3317 1000 => 16-bit 3318 1101 => 32-bit + MSL#16 3319 1100 => 32-bit + MSL#8 3320 1110 => 8-bit 3321 1111 => double 3322 instr[11,10] = 01 3323 instr[9,5] = low 5-bits of uimm8 3324 instr[4,0] = Vd. */ 3325 3326 int full = INSTR (30, 30); 3327 unsigned vd = INSTR (4, 0); 3328 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5); 3329 unsigned i; 3330 3331 NYI_assert (29, 19, 0x1E0); 3332 NYI_assert (11, 10, 1); 3333 3334 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3335 switch (INSTR (15, 12)) 3336 { 3337 case 0x0: /* 32-bit, no shift. */ 3338 case 0x2: /* 32-bit, shift by 8. */ 3339 case 0x4: /* 32-bit, shift by 16. */ 3340 case 0x6: /* 32-bit, shift by 24. */ 3341 val <<= (8 * INSTR (14, 13)); 3342 for (i = 0; i < (full ? 4 : 2); i++) 3343 aarch64_set_vec_u32 (cpu, vd, i, val); 3344 break; 3345 3346 case 0xa: /* 16-bit, shift by 8. */ 3347 val <<= 8; 3348 /* Fall through. */ 3349 case 0x8: /* 16-bit, no shift. */ 3350 for (i = 0; i < (full ? 8 : 4); i++) 3351 aarch64_set_vec_u16 (cpu, vd, i, val); 3352 break; 3353 3354 case 0xd: /* 32-bit, mask shift by 16. */ 3355 val <<= 8; 3356 val |= 0xFF; 3357 /* Fall through. */ 3358 case 0xc: /* 32-bit, mask shift by 8. */ 3359 val <<= 8; 3360 val |= 0xFF; 3361 for (i = 0; i < (full ? 4 : 2); i++) 3362 aarch64_set_vec_u32 (cpu, vd, i, val); 3363 break; 3364 3365 case 0xe: /* 8-bit, no shift. */ 3366 for (i = 0; i < (full ? 16 : 8); i++) 3367 aarch64_set_vec_u8 (cpu, vd, i, val); 3368 break; 3369 3370 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */ 3371 { 3372 float u = fp_immediate_for_encoding_32 (val); 3373 for (i = 0; i < (full ? 4 : 2); i++) 3374 aarch64_set_vec_float (cpu, vd, i, u); 3375 break; 3376 } 3377 3378 default: 3379 HALT_NYI; 3380 } 3381 } 3382 3383 static void 3384 do_vec_MVNI (sim_cpu *cpu) 3385 { 3386 /* instr[31] = 0 3387 instr[30] = full/half selector 3388 instr[29,19] = 10111100000 3389 instr[18,16] = high 3 bits of uimm8 3390 instr[15,12] = selector 3391 instr[11,10] = 01 3392 instr[9,5] = low 5-bits of uimm8 3393 instr[4,0] = Vd. */ 3394 3395 int full = INSTR (30, 30); 3396 unsigned vd = INSTR (4, 0); 3397 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5); 3398 unsigned i; 3399 3400 NYI_assert (29, 19, 0x5E0); 3401 NYI_assert (11, 10, 1); 3402 3403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3404 switch (INSTR (15, 12)) 3405 { 3406 case 0x0: /* 32-bit, no shift. */ 3407 case 0x2: /* 32-bit, shift by 8. */ 3408 case 0x4: /* 32-bit, shift by 16. */ 3409 case 0x6: /* 32-bit, shift by 24. */ 3410 val <<= (8 * INSTR (14, 13)); 3411 val = ~ val; 3412 for (i = 0; i < (full ? 4 : 2); i++) 3413 aarch64_set_vec_u32 (cpu, vd, i, val); 3414 return; 3415 3416 case 0xa: /* 16-bit, 8 bit shift. */ 3417 val <<= 8; 3418 case 0x8: /* 16-bit, no shift. */ 3419 val = ~ val; 3420 for (i = 0; i < (full ? 8 : 4); i++) 3421 aarch64_set_vec_u16 (cpu, vd, i, val); 3422 return; 3423 3424 case 0xd: /* 32-bit, mask shift by 16. */ 3425 val <<= 8; 3426 val |= 0xFF; 3427 case 0xc: /* 32-bit, mask shift by 8. */ 3428 val <<= 8; 3429 val |= 0xFF; 3430 val = ~ val; 3431 for (i = 0; i < (full ? 4 : 2); i++) 3432 aarch64_set_vec_u32 (cpu, vd, i, val); 3433 return; 3434 3435 case 0xE: /* MOVI Dn, #mask64 */ 3436 { 3437 uint64_t mask = 0; 3438 3439 for (i = 0; i < 8; i++) 3440 if (val & (1 << i)) 3441 mask |= (0xFFUL << (i * 8)); 3442 aarch64_set_vec_u64 (cpu, vd, 0, mask); 3443 aarch64_set_vec_u64 (cpu, vd, 1, mask); 3444 return; 3445 } 3446 3447 case 0xf: /* FMOV Vd.2D, #fpimm. */ 3448 { 3449 double u = fp_immediate_for_encoding_64 (val); 3450 3451 if (! full) 3452 HALT_UNALLOC; 3453 3454 aarch64_set_vec_double (cpu, vd, 0, u); 3455 aarch64_set_vec_double (cpu, vd, 1, u); 3456 return; 3457 } 3458 3459 default: 3460 HALT_NYI; 3461 } 3462 } 3463 3464 #define ABS(A) ((A) < 0 ? - (A) : (A)) 3465 3466 static void 3467 do_vec_ABS (sim_cpu *cpu) 3468 { 3469 /* instr[31] = 0 3470 instr[30] = half(0)/full(1) 3471 instr[29,24] = 00 1110 3472 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit 3473 instr[21,10] = 10 0000 1011 10 3474 instr[9,5] = Vn 3475 instr[4.0] = Vd. */ 3476 3477 unsigned vn = INSTR (9, 5); 3478 unsigned vd = INSTR (4, 0); 3479 unsigned full = INSTR (30, 30); 3480 unsigned i; 3481 3482 NYI_assert (29, 24, 0x0E); 3483 NYI_assert (21, 10, 0x82E); 3484 3485 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3486 switch (INSTR (23, 22)) 3487 { 3488 case 0: 3489 for (i = 0; i < (full ? 16 : 8); i++) 3490 aarch64_set_vec_s8 (cpu, vd, i, 3491 ABS (aarch64_get_vec_s8 (cpu, vn, i))); 3492 break; 3493 3494 case 1: 3495 for (i = 0; i < (full ? 8 : 4); i++) 3496 aarch64_set_vec_s16 (cpu, vd, i, 3497 ABS (aarch64_get_vec_s16 (cpu, vn, i))); 3498 break; 3499 3500 case 2: 3501 for (i = 0; i < (full ? 4 : 2); i++) 3502 aarch64_set_vec_s32 (cpu, vd, i, 3503 ABS (aarch64_get_vec_s32 (cpu, vn, i))); 3504 break; 3505 3506 case 3: 3507 if (! full) 3508 HALT_NYI; 3509 for (i = 0; i < 2; i++) 3510 aarch64_set_vec_s64 (cpu, vd, i, 3511 ABS (aarch64_get_vec_s64 (cpu, vn, i))); 3512 break; 3513 } 3514 } 3515 3516 static void 3517 do_vec_ADDV (sim_cpu *cpu) 3518 { 3519 /* instr[31] = 0 3520 instr[30] = full/half selector 3521 instr[29,24] = 00 1110 3522 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit 3523 instr[21,10] = 11 0001 1011 10 3524 instr[9,5] = Vm 3525 instr[4.0] = Rd. */ 3526 3527 unsigned vm = INSTR (9, 5); 3528 unsigned rd = INSTR (4, 0); 3529 unsigned i; 3530 int full = INSTR (30, 30); 3531 3532 NYI_assert (29, 24, 0x0E); 3533 NYI_assert (21, 10, 0xC6E); 3534 3535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3536 switch (INSTR (23, 22)) 3537 { 3538 case 0: 3539 { 3540 uint8_t val = 0; 3541 for (i = 0; i < (full ? 16 : 8); i++) 3542 val += aarch64_get_vec_u8 (cpu, vm, i); 3543 aarch64_set_vec_u64 (cpu, rd, 0, val); 3544 return; 3545 } 3546 3547 case 1: 3548 { 3549 uint16_t val = 0; 3550 for (i = 0; i < (full ? 8 : 4); i++) 3551 val += aarch64_get_vec_u16 (cpu, vm, i); 3552 aarch64_set_vec_u64 (cpu, rd, 0, val); 3553 return; 3554 } 3555 3556 case 2: 3557 { 3558 uint32_t val = 0; 3559 if (! full) 3560 HALT_UNALLOC; 3561 for (i = 0; i < 4; i++) 3562 val += aarch64_get_vec_u32 (cpu, vm, i); 3563 aarch64_set_vec_u64 (cpu, rd, 0, val); 3564 return; 3565 } 3566 3567 case 3: 3568 HALT_UNALLOC; 3569 } 3570 } 3571 3572 static void 3573 do_vec_ins_2 (sim_cpu *cpu) 3574 { 3575 /* instr[31,21] = 01001110000 3576 instr[20,18] = size & element selector 3577 instr[17,14] = 0000 3578 instr[13] = direction: to vec(0), from vec (1) 3579 instr[12,10] = 111 3580 instr[9,5] = Vm 3581 instr[4,0] = Vd. */ 3582 3583 unsigned elem; 3584 unsigned vm = INSTR (9, 5); 3585 unsigned vd = INSTR (4, 0); 3586 3587 NYI_assert (31, 21, 0x270); 3588 NYI_assert (17, 14, 0); 3589 NYI_assert (12, 10, 7); 3590 3591 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3592 if (INSTR (13, 13) == 1) 3593 { 3594 if (INSTR (18, 18) == 1) 3595 { 3596 /* 32-bit moves. */ 3597 elem = INSTR (20, 19); 3598 aarch64_set_reg_u64 (cpu, vd, NO_SP, 3599 aarch64_get_vec_u32 (cpu, vm, elem)); 3600 } 3601 else 3602 { 3603 /* 64-bit moves. */ 3604 if (INSTR (19, 19) != 1) 3605 HALT_NYI; 3606 3607 elem = INSTR (20, 20); 3608 aarch64_set_reg_u64 (cpu, vd, NO_SP, 3609 aarch64_get_vec_u64 (cpu, vm, elem)); 3610 } 3611 } 3612 else 3613 { 3614 if (INSTR (18, 18) == 1) 3615 { 3616 /* 32-bit moves. */ 3617 elem = INSTR (20, 19); 3618 aarch64_set_vec_u32 (cpu, vd, elem, 3619 aarch64_get_reg_u32 (cpu, vm, NO_SP)); 3620 } 3621 else 3622 { 3623 /* 64-bit moves. */ 3624 if (INSTR (19, 19) != 1) 3625 HALT_NYI; 3626 3627 elem = INSTR (20, 20); 3628 aarch64_set_vec_u64 (cpu, vd, elem, 3629 aarch64_get_reg_u64 (cpu, vm, NO_SP)); 3630 } 3631 } 3632 } 3633 3634 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \ 3635 do \ 3636 { \ 3637 DST_TYPE a[N], b[N]; \ 3638 \ 3639 for (i = 0; i < (N); i++) \ 3640 { \ 3641 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \ 3642 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \ 3643 } \ 3644 for (i = 0; i < (N); i++) \ 3645 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \ 3646 } \ 3647 while (0) 3648 3649 static void 3650 do_vec_mull (sim_cpu *cpu) 3651 { 3652 /* instr[31] = 0 3653 instr[30] = lower(0)/upper(1) selector 3654 instr[29] = signed(0)/unsigned(1) 3655 instr[28,24] = 0 1110 3656 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10) 3657 instr[21] = 1 3658 instr[20,16] = Vm 3659 instr[15,10] = 11 0000 3660 instr[9,5] = Vn 3661 instr[4.0] = Vd. */ 3662 3663 int unsign = INSTR (29, 29); 3664 int bias = INSTR (30, 30); 3665 unsigned vm = INSTR (20, 16); 3666 unsigned vn = INSTR ( 9, 5); 3667 unsigned vd = INSTR ( 4, 0); 3668 unsigned i; 3669 3670 NYI_assert (28, 24, 0x0E); 3671 NYI_assert (15, 10, 0x30); 3672 3673 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3674 /* NB: Read source values before writing results, in case 3675 the source and destination vectors are the same. */ 3676 switch (INSTR (23, 22)) 3677 { 3678 case 0: 3679 if (bias) 3680 bias = 8; 3681 if (unsign) 3682 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16); 3683 else 3684 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16); 3685 return; 3686 3687 case 1: 3688 if (bias) 3689 bias = 4; 3690 if (unsign) 3691 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32); 3692 else 3693 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32); 3694 return; 3695 3696 case 2: 3697 if (bias) 3698 bias = 2; 3699 if (unsign) 3700 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64); 3701 else 3702 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64); 3703 return; 3704 3705 case 3: 3706 HALT_NYI; 3707 } 3708 } 3709 3710 static void 3711 do_vec_fadd (sim_cpu *cpu) 3712 { 3713 /* instr[31] = 0 3714 instr[30] = half(0)/full(1) 3715 instr[29,24] = 001110 3716 instr[23] = FADD(0)/FSUB(1) 3717 instr[22] = float (0)/double(1) 3718 instr[21] = 1 3719 instr[20,16] = Vm 3720 instr[15,10] = 110101 3721 instr[9,5] = Vn 3722 instr[4.0] = Vd. */ 3723 3724 unsigned vm = INSTR (20, 16); 3725 unsigned vn = INSTR (9, 5); 3726 unsigned vd = INSTR (4, 0); 3727 unsigned i; 3728 int full = INSTR (30, 30); 3729 3730 NYI_assert (29, 24, 0x0E); 3731 NYI_assert (21, 21, 1); 3732 NYI_assert (15, 10, 0x35); 3733 3734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3735 if (INSTR (23, 23)) 3736 { 3737 if (INSTR (22, 22)) 3738 { 3739 if (! full) 3740 HALT_NYI; 3741 3742 for (i = 0; i < 2; i++) 3743 aarch64_set_vec_double (cpu, vd, i, 3744 aarch64_get_vec_double (cpu, vn, i) 3745 - aarch64_get_vec_double (cpu, vm, i)); 3746 } 3747 else 3748 { 3749 for (i = 0; i < (full ? 4 : 2); i++) 3750 aarch64_set_vec_float (cpu, vd, i, 3751 aarch64_get_vec_float (cpu, vn, i) 3752 - aarch64_get_vec_float (cpu, vm, i)); 3753 } 3754 } 3755 else 3756 { 3757 if (INSTR (22, 22)) 3758 { 3759 if (! full) 3760 HALT_NYI; 3761 3762 for (i = 0; i < 2; i++) 3763 aarch64_set_vec_double (cpu, vd, i, 3764 aarch64_get_vec_double (cpu, vm, i) 3765 + aarch64_get_vec_double (cpu, vn, i)); 3766 } 3767 else 3768 { 3769 for (i = 0; i < (full ? 4 : 2); i++) 3770 aarch64_set_vec_float (cpu, vd, i, 3771 aarch64_get_vec_float (cpu, vm, i) 3772 + aarch64_get_vec_float (cpu, vn, i)); 3773 } 3774 } 3775 } 3776 3777 static void 3778 do_vec_add (sim_cpu *cpu) 3779 { 3780 /* instr[31] = 0 3781 instr[30] = full/half selector 3782 instr[29,24] = 001110 3783 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit 3784 instr[21] = 1 3785 instr[20,16] = Vn 3786 instr[15,10] = 100001 3787 instr[9,5] = Vm 3788 instr[4.0] = Vd. */ 3789 3790 unsigned vm = INSTR (20, 16); 3791 unsigned vn = INSTR (9, 5); 3792 unsigned vd = INSTR (4, 0); 3793 unsigned i; 3794 int full = INSTR (30, 30); 3795 3796 NYI_assert (29, 24, 0x0E); 3797 NYI_assert (21, 21, 1); 3798 NYI_assert (15, 10, 0x21); 3799 3800 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3801 switch (INSTR (23, 22)) 3802 { 3803 case 0: 3804 for (i = 0; i < (full ? 16 : 8); i++) 3805 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) 3806 + aarch64_get_vec_u8 (cpu, vm, i)); 3807 return; 3808 3809 case 1: 3810 for (i = 0; i < (full ? 8 : 4); i++) 3811 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) 3812 + aarch64_get_vec_u16 (cpu, vm, i)); 3813 return; 3814 3815 case 2: 3816 for (i = 0; i < (full ? 4 : 2); i++) 3817 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) 3818 + aarch64_get_vec_u32 (cpu, vm, i)); 3819 return; 3820 3821 case 3: 3822 if (! full) 3823 HALT_UNALLOC; 3824 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0) 3825 + aarch64_get_vec_u64 (cpu, vm, 0)); 3826 aarch64_set_vec_u64 (cpu, vd, 1, 3827 aarch64_get_vec_u64 (cpu, vn, 1) 3828 + aarch64_get_vec_u64 (cpu, vm, 1)); 3829 return; 3830 } 3831 } 3832 3833 static void 3834 do_vec_mul (sim_cpu *cpu) 3835 { 3836 /* instr[31] = 0 3837 instr[30] = full/half selector 3838 instr[29,24] = 00 1110 3839 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit 3840 instr[21] = 1 3841 instr[20,16] = Vn 3842 instr[15,10] = 10 0111 3843 instr[9,5] = Vm 3844 instr[4.0] = Vd. */ 3845 3846 unsigned vm = INSTR (20, 16); 3847 unsigned vn = INSTR (9, 5); 3848 unsigned vd = INSTR (4, 0); 3849 unsigned i; 3850 int full = INSTR (30, 30); 3851 int bias = 0; 3852 3853 NYI_assert (29, 24, 0x0E); 3854 NYI_assert (21, 21, 1); 3855 NYI_assert (15, 10, 0x27); 3856 3857 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3858 switch (INSTR (23, 22)) 3859 { 3860 case 0: 3861 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8); 3862 return; 3863 3864 case 1: 3865 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16); 3866 return; 3867 3868 case 2: 3869 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32); 3870 return; 3871 3872 case 3: 3873 HALT_UNALLOC; 3874 } 3875 } 3876 3877 static void 3878 do_vec_MLA (sim_cpu *cpu) 3879 { 3880 /* instr[31] = 0 3881 instr[30] = full/half selector 3882 instr[29,24] = 00 1110 3883 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit 3884 instr[21] = 1 3885 instr[20,16] = Vn 3886 instr[15,10] = 1001 01 3887 instr[9,5] = Vm 3888 instr[4.0] = Vd. */ 3889 3890 unsigned vm = INSTR (20, 16); 3891 unsigned vn = INSTR (9, 5); 3892 unsigned vd = INSTR (4, 0); 3893 unsigned i; 3894 int full = INSTR (30, 30); 3895 3896 NYI_assert (29, 24, 0x0E); 3897 NYI_assert (21, 21, 1); 3898 NYI_assert (15, 10, 0x25); 3899 3900 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 3901 switch (INSTR (23, 22)) 3902 { 3903 case 0: 3904 for (i = 0; i < (full ? 16 : 8); i++) 3905 aarch64_set_vec_u8 (cpu, vd, i, 3906 aarch64_get_vec_u8 (cpu, vd, i) 3907 + (aarch64_get_vec_u8 (cpu, vn, i) 3908 * aarch64_get_vec_u8 (cpu, vm, i))); 3909 return; 3910 3911 case 1: 3912 for (i = 0; i < (full ? 8 : 4); i++) 3913 aarch64_set_vec_u16 (cpu, vd, i, 3914 aarch64_get_vec_u16 (cpu, vd, i) 3915 + (aarch64_get_vec_u16 (cpu, vn, i) 3916 * aarch64_get_vec_u16 (cpu, vm, i))); 3917 return; 3918 3919 case 2: 3920 for (i = 0; i < (full ? 4 : 2); i++) 3921 aarch64_set_vec_u32 (cpu, vd, i, 3922 aarch64_get_vec_u32 (cpu, vd, i) 3923 + (aarch64_get_vec_u32 (cpu, vn, i) 3924 * aarch64_get_vec_u32 (cpu, vm, i))); 3925 return; 3926 3927 default: 3928 HALT_UNALLOC; 3929 } 3930 } 3931 3932 static float 3933 fmaxnm (float a, float b) 3934 { 3935 if (! isnan (a)) 3936 { 3937 if (! isnan (b)) 3938 return a > b ? a : b; 3939 return a; 3940 } 3941 else if (! isnan (b)) 3942 return b; 3943 return a; 3944 } 3945 3946 static float 3947 fminnm (float a, float b) 3948 { 3949 if (! isnan (a)) 3950 { 3951 if (! isnan (b)) 3952 return a < b ? a : b; 3953 return a; 3954 } 3955 else if (! isnan (b)) 3956 return b; 3957 return a; 3958 } 3959 3960 static double 3961 dmaxnm (double a, double b) 3962 { 3963 if (! isnan (a)) 3964 { 3965 if (! isnan (b)) 3966 return a > b ? a : b; 3967 return a; 3968 } 3969 else if (! isnan (b)) 3970 return b; 3971 return a; 3972 } 3973 3974 static double 3975 dminnm (double a, double b) 3976 { 3977 if (! isnan (a)) 3978 { 3979 if (! isnan (b)) 3980 return a < b ? a : b; 3981 return a; 3982 } 3983 else if (! isnan (b)) 3984 return b; 3985 return a; 3986 } 3987 3988 static void 3989 do_vec_FminmaxNMP (sim_cpu *cpu) 3990 { 3991 /* instr [31] = 0 3992 instr [30] = half (0)/full (1) 3993 instr [29,24] = 10 1110 3994 instr [23] = max(0)/min(1) 3995 instr [22] = float (0)/double (1) 3996 instr [21] = 1 3997 instr [20,16] = Vn 3998 instr [15,10] = 1100 01 3999 instr [9,5] = Vm 4000 instr [4.0] = Vd. */ 4001 4002 unsigned vm = INSTR (20, 16); 4003 unsigned vn = INSTR (9, 5); 4004 unsigned vd = INSTR (4, 0); 4005 int full = INSTR (30, 30); 4006 4007 NYI_assert (29, 24, 0x2E); 4008 NYI_assert (21, 21, 1); 4009 NYI_assert (15, 10, 0x31); 4010 4011 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4012 if (INSTR (22, 22)) 4013 { 4014 double (* fn)(double, double) = INSTR (23, 23) 4015 ? dminnm : dmaxnm; 4016 4017 if (! full) 4018 HALT_NYI; 4019 aarch64_set_vec_double (cpu, vd, 0, 4020 fn (aarch64_get_vec_double (cpu, vn, 0), 4021 aarch64_get_vec_double (cpu, vn, 1))); 4022 aarch64_set_vec_double (cpu, vd, 0, 4023 fn (aarch64_get_vec_double (cpu, vm, 0), 4024 aarch64_get_vec_double (cpu, vm, 1))); 4025 } 4026 else 4027 { 4028 float (* fn)(float, float) = INSTR (23, 23) 4029 ? fminnm : fmaxnm; 4030 4031 aarch64_set_vec_float (cpu, vd, 0, 4032 fn (aarch64_get_vec_float (cpu, vn, 0), 4033 aarch64_get_vec_float (cpu, vn, 1))); 4034 if (full) 4035 aarch64_set_vec_float (cpu, vd, 1, 4036 fn (aarch64_get_vec_float (cpu, vn, 2), 4037 aarch64_get_vec_float (cpu, vn, 3))); 4038 4039 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1), 4040 fn (aarch64_get_vec_float (cpu, vm, 0), 4041 aarch64_get_vec_float (cpu, vm, 1))); 4042 if (full) 4043 aarch64_set_vec_float (cpu, vd, 3, 4044 fn (aarch64_get_vec_float (cpu, vm, 2), 4045 aarch64_get_vec_float (cpu, vm, 3))); 4046 } 4047 } 4048 4049 static void 4050 do_vec_AND (sim_cpu *cpu) 4051 { 4052 /* instr[31] = 0 4053 instr[30] = half (0)/full (1) 4054 instr[29,21] = 001110001 4055 instr[20,16] = Vm 4056 instr[15,10] = 000111 4057 instr[9,5] = Vn 4058 instr[4.0] = Vd. */ 4059 4060 unsigned vm = INSTR (20, 16); 4061 unsigned vn = INSTR (9, 5); 4062 unsigned vd = INSTR (4, 0); 4063 unsigned i; 4064 int full = INSTR (30, 30); 4065 4066 NYI_assert (29, 21, 0x071); 4067 NYI_assert (15, 10, 0x07); 4068 4069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4070 for (i = 0; i < (full ? 4 : 2); i++) 4071 aarch64_set_vec_u32 (cpu, vd, i, 4072 aarch64_get_vec_u32 (cpu, vn, i) 4073 & aarch64_get_vec_u32 (cpu, vm, i)); 4074 } 4075 4076 static void 4077 do_vec_BSL (sim_cpu *cpu) 4078 { 4079 /* instr[31] = 0 4080 instr[30] = half (0)/full (1) 4081 instr[29,21] = 101110011 4082 instr[20,16] = Vm 4083 instr[15,10] = 000111 4084 instr[9,5] = Vn 4085 instr[4.0] = Vd. */ 4086 4087 unsigned vm = INSTR (20, 16); 4088 unsigned vn = INSTR (9, 5); 4089 unsigned vd = INSTR (4, 0); 4090 unsigned i; 4091 int full = INSTR (30, 30); 4092 4093 NYI_assert (29, 21, 0x173); 4094 NYI_assert (15, 10, 0x07); 4095 4096 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4097 for (i = 0; i < (full ? 16 : 8); i++) 4098 aarch64_set_vec_u8 (cpu, vd, i, 4099 ( aarch64_get_vec_u8 (cpu, vd, i) 4100 & aarch64_get_vec_u8 (cpu, vn, i)) 4101 | ((~ aarch64_get_vec_u8 (cpu, vd, i)) 4102 & aarch64_get_vec_u8 (cpu, vm, i))); 4103 } 4104 4105 static void 4106 do_vec_EOR (sim_cpu *cpu) 4107 { 4108 /* instr[31] = 0 4109 instr[30] = half (0)/full (1) 4110 instr[29,21] = 10 1110 001 4111 instr[20,16] = Vm 4112 instr[15,10] = 000111 4113 instr[9,5] = Vn 4114 instr[4.0] = Vd. */ 4115 4116 unsigned vm = INSTR (20, 16); 4117 unsigned vn = INSTR (9, 5); 4118 unsigned vd = INSTR (4, 0); 4119 unsigned i; 4120 int full = INSTR (30, 30); 4121 4122 NYI_assert (29, 21, 0x171); 4123 NYI_assert (15, 10, 0x07); 4124 4125 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4126 for (i = 0; i < (full ? 4 : 2); i++) 4127 aarch64_set_vec_u32 (cpu, vd, i, 4128 aarch64_get_vec_u32 (cpu, vn, i) 4129 ^ aarch64_get_vec_u32 (cpu, vm, i)); 4130 } 4131 4132 static void 4133 do_vec_bit (sim_cpu *cpu) 4134 { 4135 /* instr[31] = 0 4136 instr[30] = half (0)/full (1) 4137 instr[29,23] = 10 1110 1 4138 instr[22] = BIT (0) / BIF (1) 4139 instr[21] = 1 4140 instr[20,16] = Vm 4141 instr[15,10] = 0001 11 4142 instr[9,5] = Vn 4143 instr[4.0] = Vd. */ 4144 4145 unsigned vm = INSTR (20, 16); 4146 unsigned vn = INSTR (9, 5); 4147 unsigned vd = INSTR (4, 0); 4148 unsigned full = INSTR (30, 30); 4149 unsigned test_false = INSTR (22, 22); 4150 unsigned i; 4151 4152 NYI_assert (29, 23, 0x5D); 4153 NYI_assert (21, 21, 1); 4154 NYI_assert (15, 10, 0x07); 4155 4156 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4157 for (i = 0; i < (full ? 4 : 2); i++) 4158 { 4159 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i); 4160 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i); 4161 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i); 4162 if (test_false) 4163 aarch64_set_vec_u32 (cpu, vd, i, 4164 (vd_val & vm_val) | (vn_val & ~vm_val)); 4165 else 4166 aarch64_set_vec_u32 (cpu, vd, i, 4167 (vd_val & ~vm_val) | (vn_val & vm_val)); 4168 } 4169 } 4170 4171 static void 4172 do_vec_ORN (sim_cpu *cpu) 4173 { 4174 /* instr[31] = 0 4175 instr[30] = half (0)/full (1) 4176 instr[29,21] = 00 1110 111 4177 instr[20,16] = Vm 4178 instr[15,10] = 00 0111 4179 instr[9,5] = Vn 4180 instr[4.0] = Vd. */ 4181 4182 unsigned vm = INSTR (20, 16); 4183 unsigned vn = INSTR (9, 5); 4184 unsigned vd = INSTR (4, 0); 4185 unsigned i; 4186 int full = INSTR (30, 30); 4187 4188 NYI_assert (29, 21, 0x077); 4189 NYI_assert (15, 10, 0x07); 4190 4191 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4192 for (i = 0; i < (full ? 16 : 8); i++) 4193 aarch64_set_vec_u8 (cpu, vd, i, 4194 aarch64_get_vec_u8 (cpu, vn, i) 4195 | ~ aarch64_get_vec_u8 (cpu, vm, i)); 4196 } 4197 4198 static void 4199 do_vec_ORR (sim_cpu *cpu) 4200 { 4201 /* instr[31] = 0 4202 instr[30] = half (0)/full (1) 4203 instr[29,21] = 00 1110 101 4204 instr[20,16] = Vm 4205 instr[15,10] = 0001 11 4206 instr[9,5] = Vn 4207 instr[4.0] = Vd. */ 4208 4209 unsigned vm = INSTR (20, 16); 4210 unsigned vn = INSTR (9, 5); 4211 unsigned vd = INSTR (4, 0); 4212 unsigned i; 4213 int full = INSTR (30, 30); 4214 4215 NYI_assert (29, 21, 0x075); 4216 NYI_assert (15, 10, 0x07); 4217 4218 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4219 for (i = 0; i < (full ? 16 : 8); i++) 4220 aarch64_set_vec_u8 (cpu, vd, i, 4221 aarch64_get_vec_u8 (cpu, vn, i) 4222 | aarch64_get_vec_u8 (cpu, vm, i)); 4223 } 4224 4225 static void 4226 do_vec_BIC (sim_cpu *cpu) 4227 { 4228 /* instr[31] = 0 4229 instr[30] = half (0)/full (1) 4230 instr[29,21] = 00 1110 011 4231 instr[20,16] = Vm 4232 instr[15,10] = 00 0111 4233 instr[9,5] = Vn 4234 instr[4.0] = Vd. */ 4235 4236 unsigned vm = INSTR (20, 16); 4237 unsigned vn = INSTR (9, 5); 4238 unsigned vd = INSTR (4, 0); 4239 unsigned i; 4240 int full = INSTR (30, 30); 4241 4242 NYI_assert (29, 21, 0x073); 4243 NYI_assert (15, 10, 0x07); 4244 4245 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4246 for (i = 0; i < (full ? 16 : 8); i++) 4247 aarch64_set_vec_u8 (cpu, vd, i, 4248 aarch64_get_vec_u8 (cpu, vn, i) 4249 & ~ aarch64_get_vec_u8 (cpu, vm, i)); 4250 } 4251 4252 static void 4253 do_vec_XTN (sim_cpu *cpu) 4254 { 4255 /* instr[31] = 0 4256 instr[30] = first part (0)/ second part (1) 4257 instr[29,24] = 00 1110 4258 instr[23,22] = size: byte(00), half(01), word (10) 4259 instr[21,10] = 1000 0100 1010 4260 instr[9,5] = Vs 4261 instr[4,0] = Vd. */ 4262 4263 unsigned vs = INSTR (9, 5); 4264 unsigned vd = INSTR (4, 0); 4265 unsigned bias = INSTR (30, 30); 4266 unsigned i; 4267 4268 NYI_assert (29, 24, 0x0E); 4269 NYI_assert (21, 10, 0x84A); 4270 4271 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4272 switch (INSTR (23, 22)) 4273 { 4274 case 0: 4275 for (i = 0; i < 8; i++) 4276 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8), 4277 aarch64_get_vec_u16 (cpu, vs, i)); 4278 return; 4279 4280 case 1: 4281 for (i = 0; i < 4; i++) 4282 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4), 4283 aarch64_get_vec_u32 (cpu, vs, i)); 4284 return; 4285 4286 case 2: 4287 for (i = 0; i < 2; i++) 4288 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2), 4289 aarch64_get_vec_u64 (cpu, vs, i)); 4290 return; 4291 } 4292 } 4293 4294 /* Return the number of bits set in the input value. */ 4295 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) 4296 # define popcount __builtin_popcount 4297 #else 4298 static int 4299 popcount (unsigned char x) 4300 { 4301 static const unsigned char popcnt[16] = 4302 { 4303 0, 1, 1, 2, 4304 1, 2, 2, 3, 4305 1, 2, 2, 3, 4306 2, 3, 3, 4 4307 }; 4308 4309 /* Only counts the low 8 bits of the input as that is all we need. */ 4310 return popcnt[x % 16] + popcnt[x / 16]; 4311 } 4312 #endif 4313 4314 static void 4315 do_vec_CNT (sim_cpu *cpu) 4316 { 4317 /* instr[31] = 0 4318 instr[30] = half (0)/ full (1) 4319 instr[29,24] = 00 1110 4320 instr[23,22] = size: byte(00) 4321 instr[21,10] = 1000 0001 0110 4322 instr[9,5] = Vs 4323 instr[4,0] = Vd. */ 4324 4325 unsigned vs = INSTR (9, 5); 4326 unsigned vd = INSTR (4, 0); 4327 int full = INSTR (30, 30); 4328 int size = INSTR (23, 22); 4329 int i; 4330 4331 NYI_assert (29, 24, 0x0E); 4332 NYI_assert (21, 10, 0x816); 4333 4334 if (size != 0) 4335 HALT_UNALLOC; 4336 4337 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4338 4339 for (i = 0; i < (full ? 16 : 8); i++) 4340 aarch64_set_vec_u8 (cpu, vd, i, 4341 popcount (aarch64_get_vec_u8 (cpu, vs, i))); 4342 } 4343 4344 static void 4345 do_vec_maxv (sim_cpu *cpu) 4346 { 4347 /* instr[31] = 0 4348 instr[30] = half(0)/full(1) 4349 instr[29] = signed (0)/unsigned(1) 4350 instr[28,24] = 0 1110 4351 instr[23,22] = size: byte(00), half(01), word (10) 4352 instr[21] = 1 4353 instr[20,17] = 1 000 4354 instr[16] = max(0)/min(1) 4355 instr[15,10] = 1010 10 4356 instr[9,5] = V source 4357 instr[4.0] = R dest. */ 4358 4359 unsigned vs = INSTR (9, 5); 4360 unsigned rd = INSTR (4, 0); 4361 unsigned full = INSTR (30, 30); 4362 unsigned i; 4363 4364 NYI_assert (28, 24, 0x0E); 4365 NYI_assert (21, 21, 1); 4366 NYI_assert (20, 17, 8); 4367 NYI_assert (15, 10, 0x2A); 4368 4369 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4370 switch ((INSTR (29, 29) << 1) | INSTR (16, 16)) 4371 { 4372 case 0: /* SMAXV. */ 4373 { 4374 int64_t smax; 4375 switch (INSTR (23, 22)) 4376 { 4377 case 0: 4378 smax = aarch64_get_vec_s8 (cpu, vs, 0); 4379 for (i = 1; i < (full ? 16 : 8); i++) 4380 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i)); 4381 break; 4382 case 1: 4383 smax = aarch64_get_vec_s16 (cpu, vs, 0); 4384 for (i = 1; i < (full ? 8 : 4); i++) 4385 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i)); 4386 break; 4387 case 2: 4388 smax = aarch64_get_vec_s32 (cpu, vs, 0); 4389 for (i = 1; i < (full ? 4 : 2); i++) 4390 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i)); 4391 break; 4392 case 3: 4393 HALT_UNALLOC; 4394 } 4395 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax); 4396 return; 4397 } 4398 4399 case 1: /* SMINV. */ 4400 { 4401 int64_t smin; 4402 switch (INSTR (23, 22)) 4403 { 4404 case 0: 4405 smin = aarch64_get_vec_s8 (cpu, vs, 0); 4406 for (i = 1; i < (full ? 16 : 8); i++) 4407 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i)); 4408 break; 4409 case 1: 4410 smin = aarch64_get_vec_s16 (cpu, vs, 0); 4411 for (i = 1; i < (full ? 8 : 4); i++) 4412 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i)); 4413 break; 4414 case 2: 4415 smin = aarch64_get_vec_s32 (cpu, vs, 0); 4416 for (i = 1; i < (full ? 4 : 2); i++) 4417 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i)); 4418 break; 4419 4420 case 3: 4421 HALT_UNALLOC; 4422 } 4423 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin); 4424 return; 4425 } 4426 4427 case 2: /* UMAXV. */ 4428 { 4429 uint64_t umax; 4430 switch (INSTR (23, 22)) 4431 { 4432 case 0: 4433 umax = aarch64_get_vec_u8 (cpu, vs, 0); 4434 for (i = 1; i < (full ? 16 : 8); i++) 4435 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i)); 4436 break; 4437 case 1: 4438 umax = aarch64_get_vec_u16 (cpu, vs, 0); 4439 for (i = 1; i < (full ? 8 : 4); i++) 4440 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i)); 4441 break; 4442 case 2: 4443 umax = aarch64_get_vec_u32 (cpu, vs, 0); 4444 for (i = 1; i < (full ? 4 : 2); i++) 4445 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i)); 4446 break; 4447 4448 case 3: 4449 HALT_UNALLOC; 4450 } 4451 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax); 4452 return; 4453 } 4454 4455 case 3: /* UMINV. */ 4456 { 4457 uint64_t umin; 4458 switch (INSTR (23, 22)) 4459 { 4460 case 0: 4461 umin = aarch64_get_vec_u8 (cpu, vs, 0); 4462 for (i = 1; i < (full ? 16 : 8); i++) 4463 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i)); 4464 break; 4465 case 1: 4466 umin = aarch64_get_vec_u16 (cpu, vs, 0); 4467 for (i = 1; i < (full ? 8 : 4); i++) 4468 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i)); 4469 break; 4470 case 2: 4471 umin = aarch64_get_vec_u32 (cpu, vs, 0); 4472 for (i = 1; i < (full ? 4 : 2); i++) 4473 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i)); 4474 break; 4475 4476 case 3: 4477 HALT_UNALLOC; 4478 } 4479 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin); 4480 return; 4481 } 4482 } 4483 } 4484 4485 static void 4486 do_vec_fminmaxV (sim_cpu *cpu) 4487 { 4488 /* instr[31,24] = 0110 1110 4489 instr[23] = max(0)/min(1) 4490 instr[22,14] = 011 0000 11 4491 instr[13,12] = nm(00)/normal(11) 4492 instr[11,10] = 10 4493 instr[9,5] = V source 4494 instr[4.0] = R dest. */ 4495 4496 unsigned vs = INSTR (9, 5); 4497 unsigned rd = INSTR (4, 0); 4498 unsigned i; 4499 float res = aarch64_get_vec_float (cpu, vs, 0); 4500 4501 NYI_assert (31, 24, 0x6E); 4502 NYI_assert (22, 14, 0x0C3); 4503 NYI_assert (11, 10, 2); 4504 4505 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4506 if (INSTR (23, 23)) 4507 { 4508 switch (INSTR (13, 12)) 4509 { 4510 case 0: /* FMNINNMV. */ 4511 for (i = 1; i < 4; i++) 4512 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i)); 4513 break; 4514 4515 case 3: /* FMINV. */ 4516 for (i = 1; i < 4; i++) 4517 res = min (res, aarch64_get_vec_float (cpu, vs, i)); 4518 break; 4519 4520 default: 4521 HALT_NYI; 4522 } 4523 } 4524 else 4525 { 4526 switch (INSTR (13, 12)) 4527 { 4528 case 0: /* FMNAXNMV. */ 4529 for (i = 1; i < 4; i++) 4530 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i)); 4531 break; 4532 4533 case 3: /* FMAXV. */ 4534 for (i = 1; i < 4; i++) 4535 res = max (res, aarch64_get_vec_float (cpu, vs, i)); 4536 break; 4537 4538 default: 4539 HALT_NYI; 4540 } 4541 } 4542 4543 aarch64_set_FP_float (cpu, rd, res); 4544 } 4545 4546 static void 4547 do_vec_Fminmax (sim_cpu *cpu) 4548 { 4549 /* instr[31] = 0 4550 instr[30] = half(0)/full(1) 4551 instr[29,24] = 00 1110 4552 instr[23] = max(0)/min(1) 4553 instr[22] = float(0)/double(1) 4554 instr[21] = 1 4555 instr[20,16] = Vm 4556 instr[15,14] = 11 4557 instr[13,12] = nm(00)/normal(11) 4558 instr[11,10] = 01 4559 instr[9,5] = Vn 4560 instr[4,0] = Vd. */ 4561 4562 unsigned vm = INSTR (20, 16); 4563 unsigned vn = INSTR (9, 5); 4564 unsigned vd = INSTR (4, 0); 4565 unsigned full = INSTR (30, 30); 4566 unsigned min = INSTR (23, 23); 4567 unsigned i; 4568 4569 NYI_assert (29, 24, 0x0E); 4570 NYI_assert (21, 21, 1); 4571 NYI_assert (15, 14, 3); 4572 NYI_assert (11, 10, 1); 4573 4574 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4575 if (INSTR (22, 22)) 4576 { 4577 double (* func)(double, double); 4578 4579 if (! full) 4580 HALT_NYI; 4581 4582 if (INSTR (13, 12) == 0) 4583 func = min ? dminnm : dmaxnm; 4584 else if (INSTR (13, 12) == 3) 4585 func = min ? fmin : fmax; 4586 else 4587 HALT_NYI; 4588 4589 for (i = 0; i < 2; i++) 4590 aarch64_set_vec_double (cpu, vd, i, 4591 func (aarch64_get_vec_double (cpu, vn, i), 4592 aarch64_get_vec_double (cpu, vm, i))); 4593 } 4594 else 4595 { 4596 float (* func)(float, float); 4597 4598 if (INSTR (13, 12) == 0) 4599 func = min ? fminnm : fmaxnm; 4600 else if (INSTR (13, 12) == 3) 4601 func = min ? fminf : fmaxf; 4602 else 4603 HALT_NYI; 4604 4605 for (i = 0; i < (full ? 4 : 2); i++) 4606 aarch64_set_vec_float (cpu, vd, i, 4607 func (aarch64_get_vec_float (cpu, vn, i), 4608 aarch64_get_vec_float (cpu, vm, i))); 4609 } 4610 } 4611 4612 static void 4613 do_vec_SCVTF (sim_cpu *cpu) 4614 { 4615 /* instr[31] = 0 4616 instr[30] = Q 4617 instr[29,23] = 00 1110 0 4618 instr[22] = float(0)/double(1) 4619 instr[21,10] = 10 0001 1101 10 4620 instr[9,5] = Vn 4621 instr[4,0] = Vd. */ 4622 4623 unsigned vn = INSTR (9, 5); 4624 unsigned vd = INSTR (4, 0); 4625 unsigned full = INSTR (30, 30); 4626 unsigned size = INSTR (22, 22); 4627 unsigned i; 4628 4629 NYI_assert (29, 23, 0x1C); 4630 NYI_assert (21, 10, 0x876); 4631 4632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4633 if (size) 4634 { 4635 if (! full) 4636 HALT_UNALLOC; 4637 4638 for (i = 0; i < 2; i++) 4639 { 4640 double val = (double) aarch64_get_vec_u64 (cpu, vn, i); 4641 aarch64_set_vec_double (cpu, vd, i, val); 4642 } 4643 } 4644 else 4645 { 4646 for (i = 0; i < (full ? 4 : 2); i++) 4647 { 4648 float val = (float) aarch64_get_vec_u32 (cpu, vn, i); 4649 aarch64_set_vec_float (cpu, vd, i, val); 4650 } 4651 } 4652 } 4653 4654 #define VEC_CMP(SOURCE, CMP) \ 4655 do \ 4656 { \ 4657 switch (size) \ 4658 { \ 4659 case 0: \ 4660 for (i = 0; i < (full ? 16 : 8); i++) \ 4661 aarch64_set_vec_u8 (cpu, vd, i, \ 4662 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \ 4663 CMP \ 4664 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \ 4665 ? -1 : 0); \ 4666 return; \ 4667 case 1: \ 4668 for (i = 0; i < (full ? 8 : 4); i++) \ 4669 aarch64_set_vec_u16 (cpu, vd, i, \ 4670 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \ 4671 CMP \ 4672 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \ 4673 ? -1 : 0); \ 4674 return; \ 4675 case 2: \ 4676 for (i = 0; i < (full ? 4 : 2); i++) \ 4677 aarch64_set_vec_u32 (cpu, vd, i, \ 4678 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \ 4679 CMP \ 4680 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \ 4681 ? -1 : 0); \ 4682 return; \ 4683 case 3: \ 4684 if (! full) \ 4685 HALT_UNALLOC; \ 4686 for (i = 0; i < 2; i++) \ 4687 aarch64_set_vec_u64 (cpu, vd, i, \ 4688 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ 4689 CMP \ 4690 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \ 4691 ? -1ULL : 0); \ 4692 return; \ 4693 } \ 4694 } \ 4695 while (0) 4696 4697 #define VEC_CMP0(SOURCE, CMP) \ 4698 do \ 4699 { \ 4700 switch (size) \ 4701 { \ 4702 case 0: \ 4703 for (i = 0; i < (full ? 16 : 8); i++) \ 4704 aarch64_set_vec_u8 (cpu, vd, i, \ 4705 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \ 4706 CMP 0 ? -1 : 0); \ 4707 return; \ 4708 case 1: \ 4709 for (i = 0; i < (full ? 8 : 4); i++) \ 4710 aarch64_set_vec_u16 (cpu, vd, i, \ 4711 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \ 4712 CMP 0 ? -1 : 0); \ 4713 return; \ 4714 case 2: \ 4715 for (i = 0; i < (full ? 4 : 2); i++) \ 4716 aarch64_set_vec_u32 (cpu, vd, i, \ 4717 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \ 4718 CMP 0 ? -1 : 0); \ 4719 return; \ 4720 case 3: \ 4721 if (! full) \ 4722 HALT_UNALLOC; \ 4723 for (i = 0; i < 2; i++) \ 4724 aarch64_set_vec_u64 (cpu, vd, i, \ 4725 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ 4726 CMP 0 ? -1ULL : 0); \ 4727 return; \ 4728 } \ 4729 } \ 4730 while (0) 4731 4732 #define VEC_FCMP0(CMP) \ 4733 do \ 4734 { \ 4735 if (vm != 0) \ 4736 HALT_NYI; \ 4737 if (INSTR (22, 22)) \ 4738 { \ 4739 if (! full) \ 4740 HALT_NYI; \ 4741 for (i = 0; i < 2; i++) \ 4742 aarch64_set_vec_u64 (cpu, vd, i, \ 4743 aarch64_get_vec_double (cpu, vn, i) \ 4744 CMP 0.0 ? -1 : 0); \ 4745 } \ 4746 else \ 4747 { \ 4748 for (i = 0; i < (full ? 4 : 2); i++) \ 4749 aarch64_set_vec_u32 (cpu, vd, i, \ 4750 aarch64_get_vec_float (cpu, vn, i) \ 4751 CMP 0.0 ? -1 : 0); \ 4752 } \ 4753 return; \ 4754 } \ 4755 while (0) 4756 4757 #define VEC_FCMP(CMP) \ 4758 do \ 4759 { \ 4760 if (INSTR (22, 22)) \ 4761 { \ 4762 if (! full) \ 4763 HALT_NYI; \ 4764 for (i = 0; i < 2; i++) \ 4765 aarch64_set_vec_u64 (cpu, vd, i, \ 4766 aarch64_get_vec_double (cpu, vn, i) \ 4767 CMP \ 4768 aarch64_get_vec_double (cpu, vm, i) \ 4769 ? -1 : 0); \ 4770 } \ 4771 else \ 4772 { \ 4773 for (i = 0; i < (full ? 4 : 2); i++) \ 4774 aarch64_set_vec_u32 (cpu, vd, i, \ 4775 aarch64_get_vec_float (cpu, vn, i) \ 4776 CMP \ 4777 aarch64_get_vec_float (cpu, vm, i) \ 4778 ? -1 : 0); \ 4779 } \ 4780 return; \ 4781 } \ 4782 while (0) 4783 4784 static void 4785 do_vec_compare (sim_cpu *cpu) 4786 { 4787 /* instr[31] = 0 4788 instr[30] = half(0)/full(1) 4789 instr[29] = part-of-comparison-type 4790 instr[28,24] = 0 1110 4791 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11) 4792 type of float compares: single (-0) / double (-1) 4793 instr[21] = 1 4794 instr[20,16] = Vm or 00000 (compare vs 0) 4795 instr[15,10] = part-of-comparison-type 4796 instr[9,5] = Vn 4797 instr[4.0] = Vd. */ 4798 4799 int full = INSTR (30, 30); 4800 int size = INSTR (23, 22); 4801 unsigned vm = INSTR (20, 16); 4802 unsigned vn = INSTR (9, 5); 4803 unsigned vd = INSTR (4, 0); 4804 unsigned i; 4805 4806 NYI_assert (28, 24, 0x0E); 4807 NYI_assert (21, 21, 1); 4808 4809 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4810 if ((INSTR (11, 11) 4811 && INSTR (14, 14)) 4812 || ((INSTR (11, 11) == 0 4813 && INSTR (10, 10) == 0))) 4814 { 4815 /* A compare vs 0. */ 4816 if (vm != 0) 4817 { 4818 if (INSTR (15, 10) == 0x2A) 4819 do_vec_maxv (cpu); 4820 else if (INSTR (15, 10) == 0x32 4821 || INSTR (15, 10) == 0x3E) 4822 do_vec_fminmaxV (cpu); 4823 else if (INSTR (29, 23) == 0x1C 4824 && INSTR (21, 10) == 0x876) 4825 do_vec_SCVTF (cpu); 4826 else 4827 HALT_NYI; 4828 return; 4829 } 4830 } 4831 4832 if (INSTR (14, 14)) 4833 { 4834 /* A floating point compare. */ 4835 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4) 4836 | INSTR (13, 10); 4837 4838 NYI_assert (15, 15, 1); 4839 4840 switch (decode) 4841 { 4842 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>); 4843 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=); 4844 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==); 4845 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=); 4846 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<); 4847 case /* 0b111001: GT */ 0x39: VEC_FCMP (>); 4848 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=); 4849 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==); 4850 4851 default: 4852 HALT_NYI; 4853 } 4854 } 4855 else 4856 { 4857 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10); 4858 4859 switch (decode) 4860 { 4861 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > ); 4862 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= ); 4863 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > ); 4864 case 0x23: /* 0100011 TST */ VEC_CMP (u, & ); 4865 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == ); 4866 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < ); 4867 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > ); 4868 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= ); 4869 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= ); 4870 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == ); 4871 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= ); 4872 default: 4873 if (vm == 0) 4874 HALT_NYI; 4875 do_vec_maxv (cpu); 4876 } 4877 } 4878 } 4879 4880 static void 4881 do_vec_SSHL (sim_cpu *cpu) 4882 { 4883 /* instr[31] = 0 4884 instr[30] = first part (0)/ second part (1) 4885 instr[29,24] = 00 1110 4886 instr[23,22] = size: byte(00), half(01), word (10), long (11) 4887 instr[21] = 1 4888 instr[20,16] = Vm 4889 instr[15,10] = 0100 01 4890 instr[9,5] = Vn 4891 instr[4,0] = Vd. */ 4892 4893 unsigned full = INSTR (30, 30); 4894 unsigned vm = INSTR (20, 16); 4895 unsigned vn = INSTR (9, 5); 4896 unsigned vd = INSTR (4, 0); 4897 unsigned i; 4898 signed int shift; 4899 4900 NYI_assert (29, 24, 0x0E); 4901 NYI_assert (21, 21, 1); 4902 NYI_assert (15, 10, 0x11); 4903 4904 /* FIXME: What is a signed shift left in this context ?. */ 4905 4906 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4907 switch (INSTR (23, 22)) 4908 { 4909 case 0: 4910 for (i = 0; i < (full ? 16 : 8); i++) 4911 { 4912 shift = aarch64_get_vec_s8 (cpu, vm, i); 4913 if (shift >= 0) 4914 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) 4915 << shift); 4916 else 4917 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) 4918 >> - shift); 4919 } 4920 return; 4921 4922 case 1: 4923 for (i = 0; i < (full ? 8 : 4); i++) 4924 { 4925 shift = aarch64_get_vec_s8 (cpu, vm, i * 2); 4926 if (shift >= 0) 4927 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) 4928 << shift); 4929 else 4930 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) 4931 >> - shift); 4932 } 4933 return; 4934 4935 case 2: 4936 for (i = 0; i < (full ? 4 : 2); i++) 4937 { 4938 shift = aarch64_get_vec_s8 (cpu, vm, i * 4); 4939 if (shift >= 0) 4940 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) 4941 << shift); 4942 else 4943 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) 4944 >> - shift); 4945 } 4946 return; 4947 4948 case 3: 4949 if (! full) 4950 HALT_UNALLOC; 4951 for (i = 0; i < 2; i++) 4952 { 4953 shift = aarch64_get_vec_s8 (cpu, vm, i * 8); 4954 if (shift >= 0) 4955 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) 4956 << shift); 4957 else 4958 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) 4959 >> - shift); 4960 } 4961 return; 4962 } 4963 } 4964 4965 static void 4966 do_vec_USHL (sim_cpu *cpu) 4967 { 4968 /* instr[31] = 0 4969 instr[30] = first part (0)/ second part (1) 4970 instr[29,24] = 10 1110 4971 instr[23,22] = size: byte(00), half(01), word (10), long (11) 4972 instr[21] = 1 4973 instr[20,16] = Vm 4974 instr[15,10] = 0100 01 4975 instr[9,5] = Vn 4976 instr[4,0] = Vd */ 4977 4978 unsigned full = INSTR (30, 30); 4979 unsigned vm = INSTR (20, 16); 4980 unsigned vn = INSTR (9, 5); 4981 unsigned vd = INSTR (4, 0); 4982 unsigned i; 4983 signed int shift; 4984 4985 NYI_assert (29, 24, 0x2E); 4986 NYI_assert (15, 10, 0x11); 4987 4988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 4989 switch (INSTR (23, 22)) 4990 { 4991 case 0: 4992 for (i = 0; i < (full ? 16 : 8); i++) 4993 { 4994 shift = aarch64_get_vec_s8 (cpu, vm, i); 4995 if (shift >= 0) 4996 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) 4997 << shift); 4998 else 4999 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) 5000 >> - shift); 5001 } 5002 return; 5003 5004 case 1: 5005 for (i = 0; i < (full ? 8 : 4); i++) 5006 { 5007 shift = aarch64_get_vec_s8 (cpu, vm, i * 2); 5008 if (shift >= 0) 5009 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) 5010 << shift); 5011 else 5012 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) 5013 >> - shift); 5014 } 5015 return; 5016 5017 case 2: 5018 for (i = 0; i < (full ? 4 : 2); i++) 5019 { 5020 shift = aarch64_get_vec_s8 (cpu, vm, i * 4); 5021 if (shift >= 0) 5022 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) 5023 << shift); 5024 else 5025 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) 5026 >> - shift); 5027 } 5028 return; 5029 5030 case 3: 5031 if (! full) 5032 HALT_UNALLOC; 5033 for (i = 0; i < 2; i++) 5034 { 5035 shift = aarch64_get_vec_s8 (cpu, vm, i * 8); 5036 if (shift >= 0) 5037 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) 5038 << shift); 5039 else 5040 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) 5041 >> - shift); 5042 } 5043 return; 5044 } 5045 } 5046 5047 static void 5048 do_vec_FMLA (sim_cpu *cpu) 5049 { 5050 /* instr[31] = 0 5051 instr[30] = full/half selector 5052 instr[29,23] = 0011100 5053 instr[22] = size: 0=>float, 1=>double 5054 instr[21] = 1 5055 instr[20,16] = Vn 5056 instr[15,10] = 1100 11 5057 instr[9,5] = Vm 5058 instr[4.0] = Vd. */ 5059 5060 unsigned vm = INSTR (20, 16); 5061 unsigned vn = INSTR (9, 5); 5062 unsigned vd = INSTR (4, 0); 5063 unsigned i; 5064 int full = INSTR (30, 30); 5065 5066 NYI_assert (29, 23, 0x1C); 5067 NYI_assert (21, 21, 1); 5068 NYI_assert (15, 10, 0x33); 5069 5070 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5071 if (INSTR (22, 22)) 5072 { 5073 if (! full) 5074 HALT_UNALLOC; 5075 for (i = 0; i < 2; i++) 5076 aarch64_set_vec_double (cpu, vd, i, 5077 aarch64_get_vec_double (cpu, vn, i) * 5078 aarch64_get_vec_double (cpu, vm, i) + 5079 aarch64_get_vec_double (cpu, vd, i)); 5080 } 5081 else 5082 { 5083 for (i = 0; i < (full ? 4 : 2); i++) 5084 aarch64_set_vec_float (cpu, vd, i, 5085 aarch64_get_vec_float (cpu, vn, i) * 5086 aarch64_get_vec_float (cpu, vm, i) + 5087 aarch64_get_vec_float (cpu, vd, i)); 5088 } 5089 } 5090 5091 static void 5092 do_vec_max (sim_cpu *cpu) 5093 { 5094 /* instr[31] = 0 5095 instr[30] = full/half selector 5096 instr[29] = SMAX (0) / UMAX (1) 5097 instr[28,24] = 0 1110 5098 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit 5099 instr[21] = 1 5100 instr[20,16] = Vn 5101 instr[15,10] = 0110 01 5102 instr[9,5] = Vm 5103 instr[4.0] = Vd. */ 5104 5105 unsigned vm = INSTR (20, 16); 5106 unsigned vn = INSTR (9, 5); 5107 unsigned vd = INSTR (4, 0); 5108 unsigned i; 5109 int full = INSTR (30, 30); 5110 5111 NYI_assert (28, 24, 0x0E); 5112 NYI_assert (21, 21, 1); 5113 NYI_assert (15, 10, 0x19); 5114 5115 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5116 if (INSTR (29, 29)) 5117 { 5118 switch (INSTR (23, 22)) 5119 { 5120 case 0: 5121 for (i = 0; i < (full ? 16 : 8); i++) 5122 aarch64_set_vec_u8 (cpu, vd, i, 5123 aarch64_get_vec_u8 (cpu, vn, i) 5124 > aarch64_get_vec_u8 (cpu, vm, i) 5125 ? aarch64_get_vec_u8 (cpu, vn, i) 5126 : aarch64_get_vec_u8 (cpu, vm, i)); 5127 return; 5128 5129 case 1: 5130 for (i = 0; i < (full ? 8 : 4); i++) 5131 aarch64_set_vec_u16 (cpu, vd, i, 5132 aarch64_get_vec_u16 (cpu, vn, i) 5133 > aarch64_get_vec_u16 (cpu, vm, i) 5134 ? aarch64_get_vec_u16 (cpu, vn, i) 5135 : aarch64_get_vec_u16 (cpu, vm, i)); 5136 return; 5137 5138 case 2: 5139 for (i = 0; i < (full ? 4 : 2); i++) 5140 aarch64_set_vec_u32 (cpu, vd, i, 5141 aarch64_get_vec_u32 (cpu, vn, i) 5142 > aarch64_get_vec_u32 (cpu, vm, i) 5143 ? aarch64_get_vec_u32 (cpu, vn, i) 5144 : aarch64_get_vec_u32 (cpu, vm, i)); 5145 return; 5146 5147 case 3: 5148 HALT_UNALLOC; 5149 } 5150 } 5151 else 5152 { 5153 switch (INSTR (23, 22)) 5154 { 5155 case 0: 5156 for (i = 0; i < (full ? 16 : 8); i++) 5157 aarch64_set_vec_s8 (cpu, vd, i, 5158 aarch64_get_vec_s8 (cpu, vn, i) 5159 > aarch64_get_vec_s8 (cpu, vm, i) 5160 ? aarch64_get_vec_s8 (cpu, vn, i) 5161 : aarch64_get_vec_s8 (cpu, vm, i)); 5162 return; 5163 5164 case 1: 5165 for (i = 0; i < (full ? 8 : 4); i++) 5166 aarch64_set_vec_s16 (cpu, vd, i, 5167 aarch64_get_vec_s16 (cpu, vn, i) 5168 > aarch64_get_vec_s16 (cpu, vm, i) 5169 ? aarch64_get_vec_s16 (cpu, vn, i) 5170 : aarch64_get_vec_s16 (cpu, vm, i)); 5171 return; 5172 5173 case 2: 5174 for (i = 0; i < (full ? 4 : 2); i++) 5175 aarch64_set_vec_s32 (cpu, vd, i, 5176 aarch64_get_vec_s32 (cpu, vn, i) 5177 > aarch64_get_vec_s32 (cpu, vm, i) 5178 ? aarch64_get_vec_s32 (cpu, vn, i) 5179 : aarch64_get_vec_s32 (cpu, vm, i)); 5180 return; 5181 5182 case 3: 5183 HALT_UNALLOC; 5184 } 5185 } 5186 } 5187 5188 static void 5189 do_vec_min (sim_cpu *cpu) 5190 { 5191 /* instr[31] = 0 5192 instr[30] = full/half selector 5193 instr[29] = SMIN (0) / UMIN (1) 5194 instr[28,24] = 0 1110 5195 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit 5196 instr[21] = 1 5197 instr[20,16] = Vn 5198 instr[15,10] = 0110 11 5199 instr[9,5] = Vm 5200 instr[4.0] = Vd. */ 5201 5202 unsigned vm = INSTR (20, 16); 5203 unsigned vn = INSTR (9, 5); 5204 unsigned vd = INSTR (4, 0); 5205 unsigned i; 5206 int full = INSTR (30, 30); 5207 5208 NYI_assert (28, 24, 0x0E); 5209 NYI_assert (21, 21, 1); 5210 NYI_assert (15, 10, 0x1B); 5211 5212 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5213 if (INSTR (29, 29)) 5214 { 5215 switch (INSTR (23, 22)) 5216 { 5217 case 0: 5218 for (i = 0; i < (full ? 16 : 8); i++) 5219 aarch64_set_vec_u8 (cpu, vd, i, 5220 aarch64_get_vec_u8 (cpu, vn, i) 5221 < aarch64_get_vec_u8 (cpu, vm, i) 5222 ? aarch64_get_vec_u8 (cpu, vn, i) 5223 : aarch64_get_vec_u8 (cpu, vm, i)); 5224 return; 5225 5226 case 1: 5227 for (i = 0; i < (full ? 8 : 4); i++) 5228 aarch64_set_vec_u16 (cpu, vd, i, 5229 aarch64_get_vec_u16 (cpu, vn, i) 5230 < aarch64_get_vec_u16 (cpu, vm, i) 5231 ? aarch64_get_vec_u16 (cpu, vn, i) 5232 : aarch64_get_vec_u16 (cpu, vm, i)); 5233 return; 5234 5235 case 2: 5236 for (i = 0; i < (full ? 4 : 2); i++) 5237 aarch64_set_vec_u32 (cpu, vd, i, 5238 aarch64_get_vec_u32 (cpu, vn, i) 5239 < aarch64_get_vec_u32 (cpu, vm, i) 5240 ? aarch64_get_vec_u32 (cpu, vn, i) 5241 : aarch64_get_vec_u32 (cpu, vm, i)); 5242 return; 5243 5244 case 3: 5245 HALT_UNALLOC; 5246 } 5247 } 5248 else 5249 { 5250 switch (INSTR (23, 22)) 5251 { 5252 case 0: 5253 for (i = 0; i < (full ? 16 : 8); i++) 5254 aarch64_set_vec_s8 (cpu, vd, i, 5255 aarch64_get_vec_s8 (cpu, vn, i) 5256 < aarch64_get_vec_s8 (cpu, vm, i) 5257 ? aarch64_get_vec_s8 (cpu, vn, i) 5258 : aarch64_get_vec_s8 (cpu, vm, i)); 5259 return; 5260 5261 case 1: 5262 for (i = 0; i < (full ? 8 : 4); i++) 5263 aarch64_set_vec_s16 (cpu, vd, i, 5264 aarch64_get_vec_s16 (cpu, vn, i) 5265 < aarch64_get_vec_s16 (cpu, vm, i) 5266 ? aarch64_get_vec_s16 (cpu, vn, i) 5267 : aarch64_get_vec_s16 (cpu, vm, i)); 5268 return; 5269 5270 case 2: 5271 for (i = 0; i < (full ? 4 : 2); i++) 5272 aarch64_set_vec_s32 (cpu, vd, i, 5273 aarch64_get_vec_s32 (cpu, vn, i) 5274 < aarch64_get_vec_s32 (cpu, vm, i) 5275 ? aarch64_get_vec_s32 (cpu, vn, i) 5276 : aarch64_get_vec_s32 (cpu, vm, i)); 5277 return; 5278 5279 case 3: 5280 HALT_UNALLOC; 5281 } 5282 } 5283 } 5284 5285 static void 5286 do_vec_sub_long (sim_cpu *cpu) 5287 { 5288 /* instr[31] = 0 5289 instr[30] = lower (0) / upper (1) 5290 instr[29] = signed (0) / unsigned (1) 5291 instr[28,24] = 0 1110 5292 instr[23,22] = size: bytes (00), half (01), word (10) 5293 instr[21] = 1 5294 insrt[20,16] = Vm 5295 instr[15,10] = 0010 00 5296 instr[9,5] = Vn 5297 instr[4,0] = V dest. */ 5298 5299 unsigned size = INSTR (23, 22); 5300 unsigned vm = INSTR (20, 16); 5301 unsigned vn = INSTR (9, 5); 5302 unsigned vd = INSTR (4, 0); 5303 unsigned bias = 0; 5304 unsigned i; 5305 5306 NYI_assert (28, 24, 0x0E); 5307 NYI_assert (21, 21, 1); 5308 NYI_assert (15, 10, 0x08); 5309 5310 if (size == 3) 5311 HALT_UNALLOC; 5312 5313 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5314 switch (INSTR (30, 29)) 5315 { 5316 case 2: /* SSUBL2. */ 5317 bias = 2; 5318 case 0: /* SSUBL. */ 5319 switch (size) 5320 { 5321 case 0: 5322 bias *= 3; 5323 for (i = 0; i < 8; i++) 5324 aarch64_set_vec_s16 (cpu, vd, i, 5325 aarch64_get_vec_s8 (cpu, vn, i + bias) 5326 - aarch64_get_vec_s8 (cpu, vm, i + bias)); 5327 break; 5328 5329 case 1: 5330 bias *= 2; 5331 for (i = 0; i < 4; i++) 5332 aarch64_set_vec_s32 (cpu, vd, i, 5333 aarch64_get_vec_s16 (cpu, vn, i + bias) 5334 - aarch64_get_vec_s16 (cpu, vm, i + bias)); 5335 break; 5336 5337 case 2: 5338 for (i = 0; i < 2; i++) 5339 aarch64_set_vec_s64 (cpu, vd, i, 5340 aarch64_get_vec_s32 (cpu, vn, i + bias) 5341 - aarch64_get_vec_s32 (cpu, vm, i + bias)); 5342 break; 5343 5344 default: 5345 HALT_UNALLOC; 5346 } 5347 break; 5348 5349 case 3: /* USUBL2. */ 5350 bias = 2; 5351 case 1: /* USUBL. */ 5352 switch (size) 5353 { 5354 case 0: 5355 bias *= 3; 5356 for (i = 0; i < 8; i++) 5357 aarch64_set_vec_u16 (cpu, vd, i, 5358 aarch64_get_vec_u8 (cpu, vn, i + bias) 5359 - aarch64_get_vec_u8 (cpu, vm, i + bias)); 5360 break; 5361 5362 case 1: 5363 bias *= 2; 5364 for (i = 0; i < 4; i++) 5365 aarch64_set_vec_u32 (cpu, vd, i, 5366 aarch64_get_vec_u16 (cpu, vn, i + bias) 5367 - aarch64_get_vec_u16 (cpu, vm, i + bias)); 5368 break; 5369 5370 case 2: 5371 for (i = 0; i < 2; i++) 5372 aarch64_set_vec_u64 (cpu, vd, i, 5373 aarch64_get_vec_u32 (cpu, vn, i + bias) 5374 - aarch64_get_vec_u32 (cpu, vm, i + bias)); 5375 break; 5376 5377 default: 5378 HALT_UNALLOC; 5379 } 5380 break; 5381 } 5382 } 5383 5384 static void 5385 do_vec_ADDP (sim_cpu *cpu) 5386 { 5387 /* instr[31] = 0 5388 instr[30] = half(0)/full(1) 5389 instr[29,24] = 00 1110 5390 instr[23,22] = size: bytes (00), half (01), word (10), long (11) 5391 instr[21] = 1 5392 insrt[20,16] = Vm 5393 instr[15,10] = 1011 11 5394 instr[9,5] = Vn 5395 instr[4,0] = V dest. */ 5396 5397 FRegister copy_vn; 5398 FRegister copy_vm; 5399 unsigned full = INSTR (30, 30); 5400 unsigned size = INSTR (23, 22); 5401 unsigned vm = INSTR (20, 16); 5402 unsigned vn = INSTR (9, 5); 5403 unsigned vd = INSTR (4, 0); 5404 unsigned i, range; 5405 5406 NYI_assert (29, 24, 0x0E); 5407 NYI_assert (21, 21, 1); 5408 NYI_assert (15, 10, 0x2F); 5409 5410 /* Make copies of the source registers in case vd == vn/vm. */ 5411 copy_vn = cpu->fr[vn]; 5412 copy_vm = cpu->fr[vm]; 5413 5414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5415 switch (size) 5416 { 5417 case 0: 5418 range = full ? 8 : 4; 5419 for (i = 0; i < range; i++) 5420 { 5421 aarch64_set_vec_u8 (cpu, vd, i, 5422 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]); 5423 aarch64_set_vec_u8 (cpu, vd, i + range, 5424 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]); 5425 } 5426 return; 5427 5428 case 1: 5429 range = full ? 4 : 2; 5430 for (i = 0; i < range; i++) 5431 { 5432 aarch64_set_vec_u16 (cpu, vd, i, 5433 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]); 5434 aarch64_set_vec_u16 (cpu, vd, i + range, 5435 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]); 5436 } 5437 return; 5438 5439 case 2: 5440 range = full ? 2 : 1; 5441 for (i = 0; i < range; i++) 5442 { 5443 aarch64_set_vec_u32 (cpu, vd, i, 5444 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]); 5445 aarch64_set_vec_u32 (cpu, vd, i + range, 5446 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]); 5447 } 5448 return; 5449 5450 case 3: 5451 if (! full) 5452 HALT_UNALLOC; 5453 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]); 5454 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]); 5455 return; 5456 } 5457 } 5458 5459 /* Float point vector convert to longer (precision). */ 5460 static void 5461 do_vec_FCVTL (sim_cpu *cpu) 5462 { 5463 /* instr[31] = 0 5464 instr[30] = half (0) / all (1) 5465 instr[29,23] = 00 1110 0 5466 instr[22] = single (0) / double (1) 5467 instr[21,10] = 10 0001 0111 10 5468 instr[9,5] = Rn 5469 instr[4,0] = Rd. */ 5470 5471 unsigned rn = INSTR (9, 5); 5472 unsigned rd = INSTR (4, 0); 5473 unsigned full = INSTR (30, 30); 5474 unsigned i; 5475 5476 NYI_assert (31, 31, 0); 5477 NYI_assert (29, 23, 0x1C); 5478 NYI_assert (21, 10, 0x85E); 5479 5480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5481 if (INSTR (22, 22)) 5482 { 5483 for (i = 0; i < 2; i++) 5484 aarch64_set_vec_double (cpu, rd, i, 5485 aarch64_get_vec_float (cpu, rn, i + 2*full)); 5486 } 5487 else 5488 { 5489 HALT_NYI; 5490 5491 #if 0 5492 /* TODO: Implement missing half-float support. */ 5493 for (i = 0; i < 4; i++) 5494 aarch64_set_vec_float (cpu, rd, i, 5495 aarch64_get_vec_halffloat (cpu, rn, i + 4*full)); 5496 #endif 5497 } 5498 } 5499 5500 static void 5501 do_vec_FABS (sim_cpu *cpu) 5502 { 5503 /* instr[31] = 0 5504 instr[30] = half(0)/full(1) 5505 instr[29,23] = 00 1110 1 5506 instr[22] = float(0)/double(1) 5507 instr[21,16] = 10 0000 5508 instr[15,10] = 1111 10 5509 instr[9,5] = Vn 5510 instr[4,0] = Vd. */ 5511 5512 unsigned vn = INSTR (9, 5); 5513 unsigned vd = INSTR (4, 0); 5514 unsigned full = INSTR (30, 30); 5515 unsigned i; 5516 5517 NYI_assert (29, 23, 0x1D); 5518 NYI_assert (21, 10, 0x83E); 5519 5520 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5521 if (INSTR (22, 22)) 5522 { 5523 if (! full) 5524 HALT_NYI; 5525 5526 for (i = 0; i < 2; i++) 5527 aarch64_set_vec_double (cpu, vd, i, 5528 fabs (aarch64_get_vec_double (cpu, vn, i))); 5529 } 5530 else 5531 { 5532 for (i = 0; i < (full ? 4 : 2); i++) 5533 aarch64_set_vec_float (cpu, vd, i, 5534 fabsf (aarch64_get_vec_float (cpu, vn, i))); 5535 } 5536 } 5537 5538 static void 5539 do_vec_FCVTZS (sim_cpu *cpu) 5540 { 5541 /* instr[31] = 0 5542 instr[30] = half (0) / all (1) 5543 instr[29,23] = 00 1110 1 5544 instr[22] = single (0) / double (1) 5545 instr[21,10] = 10 0001 1011 10 5546 instr[9,5] = Rn 5547 instr[4,0] = Rd. */ 5548 5549 unsigned rn = INSTR (9, 5); 5550 unsigned rd = INSTR (4, 0); 5551 unsigned full = INSTR (30, 30); 5552 unsigned i; 5553 5554 NYI_assert (31, 31, 0); 5555 NYI_assert (29, 23, 0x1D); 5556 NYI_assert (21, 10, 0x86E); 5557 5558 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5559 if (INSTR (22, 22)) 5560 { 5561 if (! full) 5562 HALT_UNALLOC; 5563 5564 for (i = 0; i < 2; i++) 5565 aarch64_set_vec_s64 (cpu, rd, i, 5566 (int64_t) aarch64_get_vec_double (cpu, rn, i)); 5567 } 5568 else 5569 for (i = 0; i < (full ? 4 : 2); i++) 5570 aarch64_set_vec_s32 (cpu, rd, i, 5571 (int32_t) aarch64_get_vec_float (cpu, rn, i)); 5572 } 5573 5574 static void 5575 do_vec_REV64 (sim_cpu *cpu) 5576 { 5577 /* instr[31] = 0 5578 instr[30] = full/half 5579 instr[29,24] = 00 1110 5580 instr[23,22] = size 5581 instr[21,10] = 10 0000 0000 10 5582 instr[9,5] = Rn 5583 instr[4,0] = Rd. */ 5584 5585 unsigned rn = INSTR (9, 5); 5586 unsigned rd = INSTR (4, 0); 5587 unsigned size = INSTR (23, 22); 5588 unsigned full = INSTR (30, 30); 5589 unsigned i; 5590 FRegister val; 5591 5592 NYI_assert (29, 24, 0x0E); 5593 NYI_assert (21, 10, 0x802); 5594 5595 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5596 switch (size) 5597 { 5598 case 0: 5599 for (i = 0; i < (full ? 16 : 8); i++) 5600 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i); 5601 break; 5602 5603 case 1: 5604 for (i = 0; i < (full ? 8 : 4); i++) 5605 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i); 5606 break; 5607 5608 case 2: 5609 for (i = 0; i < (full ? 4 : 2); i++) 5610 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i); 5611 break; 5612 5613 case 3: 5614 HALT_UNALLOC; 5615 } 5616 5617 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); 5618 if (full) 5619 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); 5620 } 5621 5622 static void 5623 do_vec_REV16 (sim_cpu *cpu) 5624 { 5625 /* instr[31] = 0 5626 instr[30] = full/half 5627 instr[29,24] = 00 1110 5628 instr[23,22] = size 5629 instr[21,10] = 10 0000 0001 10 5630 instr[9,5] = Rn 5631 instr[4,0] = Rd. */ 5632 5633 unsigned rn = INSTR (9, 5); 5634 unsigned rd = INSTR (4, 0); 5635 unsigned size = INSTR (23, 22); 5636 unsigned full = INSTR (30, 30); 5637 unsigned i; 5638 FRegister val; 5639 5640 NYI_assert (29, 24, 0x0E); 5641 NYI_assert (21, 10, 0x806); 5642 5643 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5644 switch (size) 5645 { 5646 case 0: 5647 for (i = 0; i < (full ? 16 : 8); i++) 5648 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i); 5649 break; 5650 5651 default: 5652 HALT_UNALLOC; 5653 } 5654 5655 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); 5656 if (full) 5657 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); 5658 } 5659 5660 static void 5661 do_vec_op1 (sim_cpu *cpu) 5662 { 5663 /* instr[31] = 0 5664 instr[30] = half/full 5665 instr[29,24] = 00 1110 5666 instr[23,21] = ??? 5667 instr[20,16] = Vm 5668 instr[15,10] = sub-opcode 5669 instr[9,5] = Vn 5670 instr[4,0] = Vd */ 5671 NYI_assert (29, 24, 0x0E); 5672 5673 if (INSTR (21, 21) == 0) 5674 { 5675 if (INSTR (23, 22) == 0) 5676 { 5677 if (INSTR (30, 30) == 1 5678 && INSTR (17, 14) == 0 5679 && INSTR (12, 10) == 7) 5680 return do_vec_ins_2 (cpu); 5681 5682 switch (INSTR (15, 10)) 5683 { 5684 case 0x01: do_vec_DUP_vector_into_vector (cpu); return; 5685 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return; 5686 case 0x07: do_vec_INS (cpu); return; 5687 case 0x0B: do_vec_SMOV_into_scalar (cpu); return; 5688 case 0x0F: do_vec_UMOV_into_scalar (cpu); return; 5689 5690 case 0x00: 5691 case 0x08: 5692 case 0x10: 5693 case 0x18: 5694 do_vec_TBL (cpu); return; 5695 5696 case 0x06: 5697 case 0x16: 5698 do_vec_UZP (cpu); return; 5699 5700 case 0x0A: do_vec_TRN (cpu); return; 5701 5702 case 0x0E: 5703 case 0x1E: 5704 do_vec_ZIP (cpu); return; 5705 5706 default: 5707 HALT_NYI; 5708 } 5709 } 5710 5711 switch (INSTR (13, 10)) 5712 { 5713 case 0x6: do_vec_UZP (cpu); return; 5714 case 0xE: do_vec_ZIP (cpu); return; 5715 case 0xA: do_vec_TRN (cpu); return; 5716 default: HALT_NYI; 5717 } 5718 } 5719 5720 switch (INSTR (15, 10)) 5721 { 5722 case 0x02: do_vec_REV64 (cpu); return; 5723 case 0x06: do_vec_REV16 (cpu); return; 5724 5725 case 0x07: 5726 switch (INSTR (23, 21)) 5727 { 5728 case 1: do_vec_AND (cpu); return; 5729 case 3: do_vec_BIC (cpu); return; 5730 case 5: do_vec_ORR (cpu); return; 5731 case 7: do_vec_ORN (cpu); return; 5732 default: HALT_NYI; 5733 } 5734 5735 case 0x08: do_vec_sub_long (cpu); return; 5736 case 0x0a: do_vec_XTN (cpu); return; 5737 case 0x11: do_vec_SSHL (cpu); return; 5738 case 0x16: do_vec_CNT (cpu); return; 5739 case 0x19: do_vec_max (cpu); return; 5740 case 0x1B: do_vec_min (cpu); return; 5741 case 0x21: do_vec_add (cpu); return; 5742 case 0x25: do_vec_MLA (cpu); return; 5743 case 0x27: do_vec_mul (cpu); return; 5744 case 0x2F: do_vec_ADDP (cpu); return; 5745 case 0x30: do_vec_mull (cpu); return; 5746 case 0x33: do_vec_FMLA (cpu); return; 5747 case 0x35: do_vec_fadd (cpu); return; 5748 5749 case 0x1E: 5750 switch (INSTR (20, 16)) 5751 { 5752 case 0x01: do_vec_FCVTL (cpu); return; 5753 default: HALT_NYI; 5754 } 5755 5756 case 0x2E: 5757 switch (INSTR (20, 16)) 5758 { 5759 case 0x00: do_vec_ABS (cpu); return; 5760 case 0x01: do_vec_FCVTZS (cpu); return; 5761 case 0x11: do_vec_ADDV (cpu); return; 5762 default: HALT_NYI; 5763 } 5764 5765 case 0x31: 5766 case 0x3B: 5767 do_vec_Fminmax (cpu); return; 5768 5769 case 0x0D: 5770 case 0x0F: 5771 case 0x22: 5772 case 0x23: 5773 case 0x26: 5774 case 0x2A: 5775 case 0x32: 5776 case 0x36: 5777 case 0x39: 5778 case 0x3A: 5779 do_vec_compare (cpu); return; 5780 5781 case 0x3E: 5782 do_vec_FABS (cpu); return; 5783 5784 default: 5785 HALT_NYI; 5786 } 5787 } 5788 5789 static void 5790 do_vec_xtl (sim_cpu *cpu) 5791 { 5792 /* instr[31] = 0 5793 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11) 5794 instr[28,22] = 0 1111 00 5795 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2) 5796 instr[15,10] = 1010 01 5797 instr[9,5] = V source 5798 instr[4,0] = V dest. */ 5799 5800 unsigned vs = INSTR (9, 5); 5801 unsigned vd = INSTR (4, 0); 5802 unsigned i, shift, bias = 0; 5803 5804 NYI_assert (28, 22, 0x3C); 5805 NYI_assert (15, 10, 0x29); 5806 5807 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5808 switch (INSTR (30, 29)) 5809 { 5810 case 2: /* SXTL2, SSHLL2. */ 5811 bias = 2; 5812 case 0: /* SXTL, SSHLL. */ 5813 if (INSTR (21, 21)) 5814 { 5815 int64_t val1, val2; 5816 5817 shift = INSTR (20, 16); 5818 /* Get the source values before setting the destination values 5819 in case the source and destination are the same. */ 5820 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift; 5821 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift; 5822 aarch64_set_vec_s64 (cpu, vd, 0, val1); 5823 aarch64_set_vec_s64 (cpu, vd, 1, val2); 5824 } 5825 else if (INSTR (20, 20)) 5826 { 5827 int32_t v[4]; 5828 int32_t v1,v2,v3,v4; 5829 5830 shift = INSTR (19, 16); 5831 bias *= 2; 5832 for (i = 0; i < 4; i++) 5833 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift; 5834 for (i = 0; i < 4; i++) 5835 aarch64_set_vec_s32 (cpu, vd, i, v[i]); 5836 } 5837 else 5838 { 5839 int16_t v[8]; 5840 NYI_assert (19, 19, 1); 5841 5842 shift = INSTR (18, 16); 5843 bias *= 4; 5844 for (i = 0; i < 8; i++) 5845 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift; 5846 for (i = 0; i < 8; i++) 5847 aarch64_set_vec_s16 (cpu, vd, i, v[i]); 5848 } 5849 return; 5850 5851 case 3: /* UXTL2, USHLL2. */ 5852 bias = 2; 5853 case 1: /* UXTL, USHLL. */ 5854 if (INSTR (21, 21)) 5855 { 5856 uint64_t v1, v2; 5857 shift = INSTR (20, 16); 5858 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift; 5859 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift; 5860 aarch64_set_vec_u64 (cpu, vd, 0, v1); 5861 aarch64_set_vec_u64 (cpu, vd, 1, v2); 5862 } 5863 else if (INSTR (20, 20)) 5864 { 5865 uint32_t v[4]; 5866 shift = INSTR (19, 16); 5867 bias *= 2; 5868 for (i = 0; i < 4; i++) 5869 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift; 5870 for (i = 0; i < 4; i++) 5871 aarch64_set_vec_u32 (cpu, vd, i, v[i]); 5872 } 5873 else 5874 { 5875 uint16_t v[8]; 5876 NYI_assert (19, 19, 1); 5877 5878 shift = INSTR (18, 16); 5879 bias *= 4; 5880 for (i = 0; i < 8; i++) 5881 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift; 5882 for (i = 0; i < 8; i++) 5883 aarch64_set_vec_u16 (cpu, vd, i, v[i]); 5884 } 5885 return; 5886 } 5887 } 5888 5889 static void 5890 do_vec_SHL (sim_cpu *cpu) 5891 { 5892 /* instr [31] = 0 5893 instr [30] = half(0)/full(1) 5894 instr [29,23] = 001 1110 5895 instr [22,16] = size and shift amount 5896 instr [15,10] = 01 0101 5897 instr [9, 5] = Vs 5898 instr [4, 0] = Vd. */ 5899 5900 int shift; 5901 int full = INSTR (30, 30); 5902 unsigned vs = INSTR (9, 5); 5903 unsigned vd = INSTR (4, 0); 5904 unsigned i; 5905 5906 NYI_assert (29, 23, 0x1E); 5907 NYI_assert (15, 10, 0x15); 5908 5909 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5910 if (INSTR (22, 22)) 5911 { 5912 shift = INSTR (21, 16); 5913 5914 if (full == 0) 5915 HALT_UNALLOC; 5916 5917 for (i = 0; i < 2; i++) 5918 { 5919 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i); 5920 aarch64_set_vec_u64 (cpu, vd, i, val << shift); 5921 } 5922 5923 return; 5924 } 5925 5926 if (INSTR (21, 21)) 5927 { 5928 shift = INSTR (20, 16); 5929 5930 for (i = 0; i < (full ? 4 : 2); i++) 5931 { 5932 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i); 5933 aarch64_set_vec_u32 (cpu, vd, i, val << shift); 5934 } 5935 5936 return; 5937 } 5938 5939 if (INSTR (20, 20)) 5940 { 5941 shift = INSTR (19, 16); 5942 5943 for (i = 0; i < (full ? 8 : 4); i++) 5944 { 5945 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i); 5946 aarch64_set_vec_u16 (cpu, vd, i, val << shift); 5947 } 5948 5949 return; 5950 } 5951 5952 if (INSTR (19, 19) == 0) 5953 HALT_UNALLOC; 5954 5955 shift = INSTR (18, 16); 5956 5957 for (i = 0; i < (full ? 16 : 8); i++) 5958 { 5959 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i); 5960 aarch64_set_vec_u8 (cpu, vd, i, val << shift); 5961 } 5962 } 5963 5964 static void 5965 do_vec_SSHR_USHR (sim_cpu *cpu) 5966 { 5967 /* instr [31] = 0 5968 instr [30] = half(0)/full(1) 5969 instr [29] = signed(0)/unsigned(1) 5970 instr [28,23] = 0 1111 0 5971 instr [22,16] = size and shift amount 5972 instr [15,10] = 0000 01 5973 instr [9, 5] = Vs 5974 instr [4, 0] = Vd. */ 5975 5976 int full = INSTR (30, 30); 5977 int sign = ! INSTR (29, 29); 5978 unsigned shift = INSTR (22, 16); 5979 unsigned vs = INSTR (9, 5); 5980 unsigned vd = INSTR (4, 0); 5981 unsigned i; 5982 5983 NYI_assert (28, 23, 0x1E); 5984 NYI_assert (15, 10, 0x01); 5985 5986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 5987 if (INSTR (22, 22)) 5988 { 5989 shift = 128 - shift; 5990 5991 if (full == 0) 5992 HALT_UNALLOC; 5993 5994 if (sign) 5995 for (i = 0; i < 2; i++) 5996 { 5997 int64_t val = aarch64_get_vec_s64 (cpu, vs, i); 5998 aarch64_set_vec_s64 (cpu, vd, i, val >> shift); 5999 } 6000 else 6001 for (i = 0; i < 2; i++) 6002 { 6003 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i); 6004 aarch64_set_vec_u64 (cpu, vd, i, val >> shift); 6005 } 6006 6007 return; 6008 } 6009 6010 if (INSTR (21, 21)) 6011 { 6012 shift = 64 - shift; 6013 6014 if (sign) 6015 for (i = 0; i < (full ? 4 : 2); i++) 6016 { 6017 int32_t val = aarch64_get_vec_s32 (cpu, vs, i); 6018 aarch64_set_vec_s32 (cpu, vd, i, val >> shift); 6019 } 6020 else 6021 for (i = 0; i < (full ? 4 : 2); i++) 6022 { 6023 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i); 6024 aarch64_set_vec_u32 (cpu, vd, i, val >> shift); 6025 } 6026 6027 return; 6028 } 6029 6030 if (INSTR (20, 20)) 6031 { 6032 shift = 32 - shift; 6033 6034 if (sign) 6035 for (i = 0; i < (full ? 8 : 4); i++) 6036 { 6037 int16_t val = aarch64_get_vec_s16 (cpu, vs, i); 6038 aarch64_set_vec_s16 (cpu, vd, i, val >> shift); 6039 } 6040 else 6041 for (i = 0; i < (full ? 8 : 4); i++) 6042 { 6043 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i); 6044 aarch64_set_vec_u16 (cpu, vd, i, val >> shift); 6045 } 6046 6047 return; 6048 } 6049 6050 if (INSTR (19, 19) == 0) 6051 HALT_UNALLOC; 6052 6053 shift = 16 - shift; 6054 6055 if (sign) 6056 for (i = 0; i < (full ? 16 : 8); i++) 6057 { 6058 int8_t val = aarch64_get_vec_s8 (cpu, vs, i); 6059 aarch64_set_vec_s8 (cpu, vd, i, val >> shift); 6060 } 6061 else 6062 for (i = 0; i < (full ? 16 : 8); i++) 6063 { 6064 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i); 6065 aarch64_set_vec_u8 (cpu, vd, i, val >> shift); 6066 } 6067 } 6068 6069 static void 6070 do_vec_MUL_by_element (sim_cpu *cpu) 6071 { 6072 /* instr[31] = 0 6073 instr[30] = half/full 6074 instr[29,24] = 00 1111 6075 instr[23,22] = size 6076 instr[21] = L 6077 instr[20] = M 6078 instr[19,16] = m 6079 instr[15,12] = 1000 6080 instr[11] = H 6081 instr[10] = 0 6082 instr[9,5] = Vn 6083 instr[4,0] = Vd */ 6084 6085 unsigned full = INSTR (30, 30); 6086 unsigned L = INSTR (21, 21); 6087 unsigned H = INSTR (11, 11); 6088 unsigned vn = INSTR (9, 5); 6089 unsigned vd = INSTR (4, 0); 6090 unsigned size = INSTR (23, 22); 6091 unsigned index; 6092 unsigned vm; 6093 unsigned e; 6094 6095 NYI_assert (29, 24, 0x0F); 6096 NYI_assert (15, 12, 0x8); 6097 NYI_assert (10, 10, 0); 6098 6099 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6100 switch (size) 6101 { 6102 case 1: 6103 { 6104 /* 16 bit products. */ 6105 uint16_t product; 6106 uint16_t element1; 6107 uint16_t element2; 6108 6109 index = (H << 2) | (L << 1) | INSTR (20, 20); 6110 vm = INSTR (19, 16); 6111 element2 = aarch64_get_vec_u16 (cpu, vm, index); 6112 6113 for (e = 0; e < (full ? 8 : 4); e ++) 6114 { 6115 element1 = aarch64_get_vec_u16 (cpu, vn, e); 6116 product = element1 * element2; 6117 aarch64_set_vec_u16 (cpu, vd, e, product); 6118 } 6119 } 6120 break; 6121 6122 case 2: 6123 { 6124 /* 32 bit products. */ 6125 uint32_t product; 6126 uint32_t element1; 6127 uint32_t element2; 6128 6129 index = (H << 1) | L; 6130 vm = INSTR (20, 16); 6131 element2 = aarch64_get_vec_u32 (cpu, vm, index); 6132 6133 for (e = 0; e < (full ? 4 : 2); e ++) 6134 { 6135 element1 = aarch64_get_vec_u32 (cpu, vn, e); 6136 product = element1 * element2; 6137 aarch64_set_vec_u32 (cpu, vd, e, product); 6138 } 6139 } 6140 break; 6141 6142 default: 6143 HALT_UNALLOC; 6144 } 6145 } 6146 6147 static void 6148 do_FMLA_by_element (sim_cpu *cpu) 6149 { 6150 /* instr[31] = 0 6151 instr[30] = half/full 6152 instr[29,23] = 00 1111 1 6153 instr[22] = size 6154 instr[21] = L 6155 instr[20,16] = m 6156 instr[15,12] = 0001 6157 instr[11] = H 6158 instr[10] = 0 6159 instr[9,5] = Vn 6160 instr[4,0] = Vd */ 6161 6162 unsigned full = INSTR (30, 30); 6163 unsigned size = INSTR (22, 22); 6164 unsigned L = INSTR (21, 21); 6165 unsigned vm = INSTR (20, 16); 6166 unsigned H = INSTR (11, 11); 6167 unsigned vn = INSTR (9, 5); 6168 unsigned vd = INSTR (4, 0); 6169 unsigned e; 6170 6171 NYI_assert (29, 23, 0x1F); 6172 NYI_assert (15, 12, 0x1); 6173 NYI_assert (10, 10, 0); 6174 6175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6176 if (size) 6177 { 6178 double element1, element2; 6179 6180 if (! full || L) 6181 HALT_UNALLOC; 6182 6183 element2 = aarch64_get_vec_double (cpu, vm, H); 6184 6185 for (e = 0; e < 2; e++) 6186 { 6187 element1 = aarch64_get_vec_double (cpu, vn, e); 6188 element1 *= element2; 6189 element1 += aarch64_get_vec_double (cpu, vd, e); 6190 aarch64_set_vec_double (cpu, vd, e, element1); 6191 } 6192 } 6193 else 6194 { 6195 float element1; 6196 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L); 6197 6198 for (e = 0; e < (full ? 4 : 2); e++) 6199 { 6200 element1 = aarch64_get_vec_float (cpu, vn, e); 6201 element1 *= element2; 6202 element1 += aarch64_get_vec_float (cpu, vd, e); 6203 aarch64_set_vec_float (cpu, vd, e, element1); 6204 } 6205 } 6206 } 6207 6208 static void 6209 do_vec_op2 (sim_cpu *cpu) 6210 { 6211 /* instr[31] = 0 6212 instr[30] = half/full 6213 instr[29,24] = 00 1111 6214 instr[23] = ? 6215 instr[22,16] = element size & index 6216 instr[15,10] = sub-opcode 6217 instr[9,5] = Vm 6218 instr[4,0] = Vd */ 6219 6220 NYI_assert (29, 24, 0x0F); 6221 6222 if (INSTR (23, 23) != 0) 6223 { 6224 switch (INSTR (15, 10)) 6225 { 6226 case 0x04: 6227 case 0x06: 6228 do_FMLA_by_element (cpu); 6229 return; 6230 6231 case 0x20: 6232 case 0x22: 6233 do_vec_MUL_by_element (cpu); 6234 return; 6235 6236 default: 6237 HALT_NYI; 6238 } 6239 } 6240 else 6241 { 6242 switch (INSTR (15, 10)) 6243 { 6244 case 0x01: do_vec_SSHR_USHR (cpu); return; 6245 case 0x15: do_vec_SHL (cpu); return; 6246 case 0x20: 6247 case 0x22: do_vec_MUL_by_element (cpu); return; 6248 case 0x29: do_vec_xtl (cpu); return; 6249 default: HALT_NYI; 6250 } 6251 } 6252 } 6253 6254 static void 6255 do_vec_neg (sim_cpu *cpu) 6256 { 6257 /* instr[31] = 0 6258 instr[30] = full(1)/half(0) 6259 instr[29,24] = 10 1110 6260 instr[23,22] = size: byte(00), half (01), word (10), long (11) 6261 instr[21,10] = 1000 0010 1110 6262 instr[9,5] = Vs 6263 instr[4,0] = Vd */ 6264 6265 int full = INSTR (30, 30); 6266 unsigned vs = INSTR (9, 5); 6267 unsigned vd = INSTR (4, 0); 6268 unsigned i; 6269 6270 NYI_assert (29, 24, 0x2E); 6271 NYI_assert (21, 10, 0x82E); 6272 6273 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6274 switch (INSTR (23, 22)) 6275 { 6276 case 0: 6277 for (i = 0; i < (full ? 16 : 8); i++) 6278 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i)); 6279 return; 6280 6281 case 1: 6282 for (i = 0; i < (full ? 8 : 4); i++) 6283 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i)); 6284 return; 6285 6286 case 2: 6287 for (i = 0; i < (full ? 4 : 2); i++) 6288 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i)); 6289 return; 6290 6291 case 3: 6292 if (! full) 6293 HALT_NYI; 6294 for (i = 0; i < 2; i++) 6295 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i)); 6296 return; 6297 } 6298 } 6299 6300 static void 6301 do_vec_sqrt (sim_cpu *cpu) 6302 { 6303 /* instr[31] = 0 6304 instr[30] = full(1)/half(0) 6305 instr[29,23] = 101 1101 6306 instr[22] = single(0)/double(1) 6307 instr[21,10] = 1000 0111 1110 6308 instr[9,5] = Vs 6309 instr[4,0] = Vd. */ 6310 6311 int full = INSTR (30, 30); 6312 unsigned vs = INSTR (9, 5); 6313 unsigned vd = INSTR (4, 0); 6314 unsigned i; 6315 6316 NYI_assert (29, 23, 0x5B); 6317 NYI_assert (21, 10, 0x87E); 6318 6319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6320 if (INSTR (22, 22) == 0) 6321 for (i = 0; i < (full ? 4 : 2); i++) 6322 aarch64_set_vec_float (cpu, vd, i, 6323 sqrtf (aarch64_get_vec_float (cpu, vs, i))); 6324 else 6325 for (i = 0; i < 2; i++) 6326 aarch64_set_vec_double (cpu, vd, i, 6327 sqrt (aarch64_get_vec_double (cpu, vs, i))); 6328 } 6329 6330 static void 6331 do_vec_mls_indexed (sim_cpu *cpu) 6332 { 6333 /* instr[31] = 0 6334 instr[30] = half(0)/full(1) 6335 instr[29,24] = 10 1111 6336 instr[23,22] = 16-bit(01)/32-bit(10) 6337 instr[21,20+11] = index (if 16-bit) 6338 instr[21+11] = index (if 32-bit) 6339 instr[20,16] = Vm 6340 instr[15,12] = 0100 6341 instr[11] = part of index 6342 instr[10] = 0 6343 instr[9,5] = Vs 6344 instr[4,0] = Vd. */ 6345 6346 int full = INSTR (30, 30); 6347 unsigned vs = INSTR (9, 5); 6348 unsigned vd = INSTR (4, 0); 6349 unsigned vm = INSTR (20, 16); 6350 unsigned i; 6351 6352 NYI_assert (15, 12, 4); 6353 NYI_assert (10, 10, 0); 6354 6355 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6356 switch (INSTR (23, 22)) 6357 { 6358 case 1: 6359 { 6360 unsigned elem; 6361 uint32_t val; 6362 6363 if (vm > 15) 6364 HALT_NYI; 6365 6366 elem = (INSTR (21, 20) << 1) | INSTR (11, 11); 6367 val = aarch64_get_vec_u16 (cpu, vm, elem); 6368 6369 for (i = 0; i < (full ? 8 : 4); i++) 6370 aarch64_set_vec_u32 (cpu, vd, i, 6371 aarch64_get_vec_u32 (cpu, vd, i) - 6372 (aarch64_get_vec_u32 (cpu, vs, i) * val)); 6373 return; 6374 } 6375 6376 case 2: 6377 { 6378 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11); 6379 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem); 6380 6381 for (i = 0; i < (full ? 4 : 2); i++) 6382 aarch64_set_vec_u64 (cpu, vd, i, 6383 aarch64_get_vec_u64 (cpu, vd, i) - 6384 (aarch64_get_vec_u64 (cpu, vs, i) * val)); 6385 return; 6386 } 6387 6388 case 0: 6389 case 3: 6390 default: 6391 HALT_NYI; 6392 } 6393 } 6394 6395 static void 6396 do_vec_SUB (sim_cpu *cpu) 6397 { 6398 /* instr [31] = 0 6399 instr [30] = half(0)/full(1) 6400 instr [29,24] = 10 1110 6401 instr [23,22] = size: byte(00, half(01), word (10), long (11) 6402 instr [21] = 1 6403 instr [20,16] = Vm 6404 instr [15,10] = 10 0001 6405 instr [9, 5] = Vn 6406 instr [4, 0] = Vd. */ 6407 6408 unsigned full = INSTR (30, 30); 6409 unsigned vm = INSTR (20, 16); 6410 unsigned vn = INSTR (9, 5); 6411 unsigned vd = INSTR (4, 0); 6412 unsigned i; 6413 6414 NYI_assert (29, 24, 0x2E); 6415 NYI_assert (21, 21, 1); 6416 NYI_assert (15, 10, 0x21); 6417 6418 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6419 switch (INSTR (23, 22)) 6420 { 6421 case 0: 6422 for (i = 0; i < (full ? 16 : 8); i++) 6423 aarch64_set_vec_s8 (cpu, vd, i, 6424 aarch64_get_vec_s8 (cpu, vn, i) 6425 - aarch64_get_vec_s8 (cpu, vm, i)); 6426 return; 6427 6428 case 1: 6429 for (i = 0; i < (full ? 8 : 4); i++) 6430 aarch64_set_vec_s16 (cpu, vd, i, 6431 aarch64_get_vec_s16 (cpu, vn, i) 6432 - aarch64_get_vec_s16 (cpu, vm, i)); 6433 return; 6434 6435 case 2: 6436 for (i = 0; i < (full ? 4 : 2); i++) 6437 aarch64_set_vec_s32 (cpu, vd, i, 6438 aarch64_get_vec_s32 (cpu, vn, i) 6439 - aarch64_get_vec_s32 (cpu, vm, i)); 6440 return; 6441 6442 case 3: 6443 if (full == 0) 6444 HALT_UNALLOC; 6445 6446 for (i = 0; i < 2; i++) 6447 aarch64_set_vec_s64 (cpu, vd, i, 6448 aarch64_get_vec_s64 (cpu, vn, i) 6449 - aarch64_get_vec_s64 (cpu, vm, i)); 6450 return; 6451 } 6452 } 6453 6454 static void 6455 do_vec_MLS (sim_cpu *cpu) 6456 { 6457 /* instr [31] = 0 6458 instr [30] = half(0)/full(1) 6459 instr [29,24] = 10 1110 6460 instr [23,22] = size: byte(00, half(01), word (10) 6461 instr [21] = 1 6462 instr [20,16] = Vm 6463 instr [15,10] = 10 0101 6464 instr [9, 5] = Vn 6465 instr [4, 0] = Vd. */ 6466 6467 unsigned full = INSTR (30, 30); 6468 unsigned vm = INSTR (20, 16); 6469 unsigned vn = INSTR (9, 5); 6470 unsigned vd = INSTR (4, 0); 6471 unsigned i; 6472 6473 NYI_assert (29, 24, 0x2E); 6474 NYI_assert (21, 21, 1); 6475 NYI_assert (15, 10, 0x25); 6476 6477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6478 switch (INSTR (23, 22)) 6479 { 6480 case 0: 6481 for (i = 0; i < (full ? 16 : 8); i++) 6482 aarch64_set_vec_u8 (cpu, vd, i, 6483 aarch64_get_vec_u8 (cpu, vd, i) 6484 - (aarch64_get_vec_u8 (cpu, vn, i) 6485 * aarch64_get_vec_u8 (cpu, vm, i))); 6486 return; 6487 6488 case 1: 6489 for (i = 0; i < (full ? 8 : 4); i++) 6490 aarch64_set_vec_u16 (cpu, vd, i, 6491 aarch64_get_vec_u16 (cpu, vd, i) 6492 - (aarch64_get_vec_u16 (cpu, vn, i) 6493 * aarch64_get_vec_u16 (cpu, vm, i))); 6494 return; 6495 6496 case 2: 6497 for (i = 0; i < (full ? 4 : 2); i++) 6498 aarch64_set_vec_u32 (cpu, vd, i, 6499 aarch64_get_vec_u32 (cpu, vd, i) 6500 - (aarch64_get_vec_u32 (cpu, vn, i) 6501 * aarch64_get_vec_u32 (cpu, vm, i))); 6502 return; 6503 6504 default: 6505 HALT_UNALLOC; 6506 } 6507 } 6508 6509 static void 6510 do_vec_FDIV (sim_cpu *cpu) 6511 { 6512 /* instr [31] = 0 6513 instr [30] = half(0)/full(1) 6514 instr [29,23] = 10 1110 0 6515 instr [22] = float()/double(1) 6516 instr [21] = 1 6517 instr [20,16] = Vm 6518 instr [15,10] = 1111 11 6519 instr [9, 5] = Vn 6520 instr [4, 0] = Vd. */ 6521 6522 unsigned full = INSTR (30, 30); 6523 unsigned vm = INSTR (20, 16); 6524 unsigned vn = INSTR (9, 5); 6525 unsigned vd = INSTR (4, 0); 6526 unsigned i; 6527 6528 NYI_assert (29, 23, 0x5C); 6529 NYI_assert (21, 21, 1); 6530 NYI_assert (15, 10, 0x3F); 6531 6532 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6533 if (INSTR (22, 22)) 6534 { 6535 if (! full) 6536 HALT_UNALLOC; 6537 6538 for (i = 0; i < 2; i++) 6539 aarch64_set_vec_double (cpu, vd, i, 6540 aarch64_get_vec_double (cpu, vn, i) 6541 / aarch64_get_vec_double (cpu, vm, i)); 6542 } 6543 else 6544 for (i = 0; i < (full ? 4 : 2); i++) 6545 aarch64_set_vec_float (cpu, vd, i, 6546 aarch64_get_vec_float (cpu, vn, i) 6547 / aarch64_get_vec_float (cpu, vm, i)); 6548 } 6549 6550 static void 6551 do_vec_FMUL (sim_cpu *cpu) 6552 { 6553 /* instr [31] = 0 6554 instr [30] = half(0)/full(1) 6555 instr [29,23] = 10 1110 0 6556 instr [22] = float(0)/double(1) 6557 instr [21] = 1 6558 instr [20,16] = Vm 6559 instr [15,10] = 1101 11 6560 instr [9, 5] = Vn 6561 instr [4, 0] = Vd. */ 6562 6563 unsigned full = INSTR (30, 30); 6564 unsigned vm = INSTR (20, 16); 6565 unsigned vn = INSTR (9, 5); 6566 unsigned vd = INSTR (4, 0); 6567 unsigned i; 6568 6569 NYI_assert (29, 23, 0x5C); 6570 NYI_assert (21, 21, 1); 6571 NYI_assert (15, 10, 0x37); 6572 6573 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6574 if (INSTR (22, 22)) 6575 { 6576 if (! full) 6577 HALT_UNALLOC; 6578 6579 for (i = 0; i < 2; i++) 6580 aarch64_set_vec_double (cpu, vd, i, 6581 aarch64_get_vec_double (cpu, vn, i) 6582 * aarch64_get_vec_double (cpu, vm, i)); 6583 } 6584 else 6585 for (i = 0; i < (full ? 4 : 2); i++) 6586 aarch64_set_vec_float (cpu, vd, i, 6587 aarch64_get_vec_float (cpu, vn, i) 6588 * aarch64_get_vec_float (cpu, vm, i)); 6589 } 6590 6591 static void 6592 do_vec_FADDP (sim_cpu *cpu) 6593 { 6594 /* instr [31] = 0 6595 instr [30] = half(0)/full(1) 6596 instr [29,23] = 10 1110 0 6597 instr [22] = float(0)/double(1) 6598 instr [21] = 1 6599 instr [20,16] = Vm 6600 instr [15,10] = 1101 01 6601 instr [9, 5] = Vn 6602 instr [4, 0] = Vd. */ 6603 6604 unsigned full = INSTR (30, 30); 6605 unsigned vm = INSTR (20, 16); 6606 unsigned vn = INSTR (9, 5); 6607 unsigned vd = INSTR (4, 0); 6608 6609 NYI_assert (29, 23, 0x5C); 6610 NYI_assert (21, 21, 1); 6611 NYI_assert (15, 10, 0x35); 6612 6613 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6614 if (INSTR (22, 22)) 6615 { 6616 /* Extract values before adding them incase vd == vn/vm. */ 6617 double tmp1 = aarch64_get_vec_double (cpu, vn, 0); 6618 double tmp2 = aarch64_get_vec_double (cpu, vn, 1); 6619 double tmp3 = aarch64_get_vec_double (cpu, vm, 0); 6620 double tmp4 = aarch64_get_vec_double (cpu, vm, 1); 6621 6622 if (! full) 6623 HALT_UNALLOC; 6624 6625 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2); 6626 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4); 6627 } 6628 else 6629 { 6630 /* Extract values before adding them incase vd == vn/vm. */ 6631 float tmp1 = aarch64_get_vec_float (cpu, vn, 0); 6632 float tmp2 = aarch64_get_vec_float (cpu, vn, 1); 6633 float tmp5 = aarch64_get_vec_float (cpu, vm, 0); 6634 float tmp6 = aarch64_get_vec_float (cpu, vm, 1); 6635 6636 if (full) 6637 { 6638 float tmp3 = aarch64_get_vec_float (cpu, vn, 2); 6639 float tmp4 = aarch64_get_vec_float (cpu, vn, 3); 6640 float tmp7 = aarch64_get_vec_float (cpu, vm, 2); 6641 float tmp8 = aarch64_get_vec_float (cpu, vm, 3); 6642 6643 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2); 6644 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4); 6645 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6); 6646 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8); 6647 } 6648 else 6649 { 6650 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2); 6651 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6); 6652 } 6653 } 6654 } 6655 6656 static void 6657 do_vec_FSQRT (sim_cpu *cpu) 6658 { 6659 /* instr[31] = 0 6660 instr[30] = half(0)/full(1) 6661 instr[29,23] = 10 1110 1 6662 instr[22] = single(0)/double(1) 6663 instr[21,10] = 10 0001 1111 10 6664 instr[9,5] = Vsrc 6665 instr[4,0] = Vdest. */ 6666 6667 unsigned vn = INSTR (9, 5); 6668 unsigned vd = INSTR (4, 0); 6669 unsigned full = INSTR (30, 30); 6670 int i; 6671 6672 NYI_assert (29, 23, 0x5D); 6673 NYI_assert (21, 10, 0x87E); 6674 6675 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6676 if (INSTR (22, 22)) 6677 { 6678 if (! full) 6679 HALT_UNALLOC; 6680 6681 for (i = 0; i < 2; i++) 6682 aarch64_set_vec_double (cpu, vd, i, 6683 sqrt (aarch64_get_vec_double (cpu, vn, i))); 6684 } 6685 else 6686 { 6687 for (i = 0; i < (full ? 4 : 2); i++) 6688 aarch64_set_vec_float (cpu, vd, i, 6689 sqrtf (aarch64_get_vec_float (cpu, vn, i))); 6690 } 6691 } 6692 6693 static void 6694 do_vec_FNEG (sim_cpu *cpu) 6695 { 6696 /* instr[31] = 0 6697 instr[30] = half (0)/full (1) 6698 instr[29,23] = 10 1110 1 6699 instr[22] = single (0)/double (1) 6700 instr[21,10] = 10 0000 1111 10 6701 instr[9,5] = Vsrc 6702 instr[4,0] = Vdest. */ 6703 6704 unsigned vn = INSTR (9, 5); 6705 unsigned vd = INSTR (4, 0); 6706 unsigned full = INSTR (30, 30); 6707 int i; 6708 6709 NYI_assert (29, 23, 0x5D); 6710 NYI_assert (21, 10, 0x83E); 6711 6712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6713 if (INSTR (22, 22)) 6714 { 6715 if (! full) 6716 HALT_UNALLOC; 6717 6718 for (i = 0; i < 2; i++) 6719 aarch64_set_vec_double (cpu, vd, i, 6720 - aarch64_get_vec_double (cpu, vn, i)); 6721 } 6722 else 6723 { 6724 for (i = 0; i < (full ? 4 : 2); i++) 6725 aarch64_set_vec_float (cpu, vd, i, 6726 - aarch64_get_vec_float (cpu, vn, i)); 6727 } 6728 } 6729 6730 static void 6731 do_vec_NOT (sim_cpu *cpu) 6732 { 6733 /* instr[31] = 0 6734 instr[30] = half (0)/full (1) 6735 instr[29,10] = 10 1110 0010 0000 0101 10 6736 instr[9,5] = Vn 6737 instr[4.0] = Vd. */ 6738 6739 unsigned vn = INSTR (9, 5); 6740 unsigned vd = INSTR (4, 0); 6741 unsigned i; 6742 int full = INSTR (30, 30); 6743 6744 NYI_assert (29, 10, 0xB8816); 6745 6746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6747 for (i = 0; i < (full ? 16 : 8); i++) 6748 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i)); 6749 } 6750 6751 static unsigned int 6752 clz (uint64_t val, unsigned size) 6753 { 6754 uint64_t mask = 1; 6755 int count; 6756 6757 mask <<= (size - 1); 6758 count = 0; 6759 do 6760 { 6761 if (val & mask) 6762 break; 6763 mask >>= 1; 6764 count ++; 6765 } 6766 while (mask); 6767 6768 return count; 6769 } 6770 6771 static void 6772 do_vec_CLZ (sim_cpu *cpu) 6773 { 6774 /* instr[31] = 0 6775 instr[30] = half (0)/full (1) 6776 instr[29,24] = 10 1110 6777 instr[23,22] = size 6778 instr[21,10] = 10 0000 0100 10 6779 instr[9,5] = Vn 6780 instr[4.0] = Vd. */ 6781 6782 unsigned vn = INSTR (9, 5); 6783 unsigned vd = INSTR (4, 0); 6784 unsigned i; 6785 int full = INSTR (30,30); 6786 6787 NYI_assert (29, 24, 0x2E); 6788 NYI_assert (21, 10, 0x812); 6789 6790 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6791 switch (INSTR (23, 22)) 6792 { 6793 case 0: 6794 for (i = 0; i < (full ? 16 : 8); i++) 6795 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8)); 6796 break; 6797 case 1: 6798 for (i = 0; i < (full ? 8 : 4); i++) 6799 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16)); 6800 break; 6801 case 2: 6802 for (i = 0; i < (full ? 4 : 2); i++) 6803 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32)); 6804 break; 6805 case 3: 6806 if (! full) 6807 HALT_UNALLOC; 6808 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64)); 6809 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64)); 6810 break; 6811 } 6812 } 6813 6814 static void 6815 do_vec_MOV_element (sim_cpu *cpu) 6816 { 6817 /* instr[31,21] = 0110 1110 000 6818 instr[20,16] = size & dest index 6819 instr[15] = 0 6820 instr[14,11] = source index 6821 instr[10] = 1 6822 instr[9,5] = Vs 6823 instr[4.0] = Vd. */ 6824 6825 unsigned vs = INSTR (9, 5); 6826 unsigned vd = INSTR (4, 0); 6827 unsigned src_index; 6828 unsigned dst_index; 6829 6830 NYI_assert (31, 21, 0x370); 6831 NYI_assert (15, 15, 0); 6832 NYI_assert (10, 10, 1); 6833 6834 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6835 if (INSTR (16, 16)) 6836 { 6837 /* Move a byte. */ 6838 src_index = INSTR (14, 11); 6839 dst_index = INSTR (20, 17); 6840 aarch64_set_vec_u8 (cpu, vd, dst_index, 6841 aarch64_get_vec_u8 (cpu, vs, src_index)); 6842 } 6843 else if (INSTR (17, 17)) 6844 { 6845 /* Move 16-bits. */ 6846 NYI_assert (11, 11, 0); 6847 src_index = INSTR (14, 12); 6848 dst_index = INSTR (20, 18); 6849 aarch64_set_vec_u16 (cpu, vd, dst_index, 6850 aarch64_get_vec_u16 (cpu, vs, src_index)); 6851 } 6852 else if (INSTR (18, 18)) 6853 { 6854 /* Move 32-bits. */ 6855 NYI_assert (12, 11, 0); 6856 src_index = INSTR (14, 13); 6857 dst_index = INSTR (20, 19); 6858 aarch64_set_vec_u32 (cpu, vd, dst_index, 6859 aarch64_get_vec_u32 (cpu, vs, src_index)); 6860 } 6861 else 6862 { 6863 NYI_assert (19, 19, 1); 6864 NYI_assert (13, 11, 0); 6865 src_index = INSTR (14, 14); 6866 dst_index = INSTR (20, 20); 6867 aarch64_set_vec_u64 (cpu, vd, dst_index, 6868 aarch64_get_vec_u64 (cpu, vs, src_index)); 6869 } 6870 } 6871 6872 static void 6873 do_vec_REV32 (sim_cpu *cpu) 6874 { 6875 /* instr[31] = 0 6876 instr[30] = full/half 6877 instr[29,24] = 10 1110 6878 instr[23,22] = size 6879 instr[21,10] = 10 0000 0000 10 6880 instr[9,5] = Rn 6881 instr[4,0] = Rd. */ 6882 6883 unsigned rn = INSTR (9, 5); 6884 unsigned rd = INSTR (4, 0); 6885 unsigned size = INSTR (23, 22); 6886 unsigned full = INSTR (30, 30); 6887 unsigned i; 6888 FRegister val; 6889 6890 NYI_assert (29, 24, 0x2E); 6891 NYI_assert (21, 10, 0x802); 6892 6893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6894 switch (size) 6895 { 6896 case 0: 6897 for (i = 0; i < (full ? 16 : 8); i++) 6898 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i); 6899 break; 6900 6901 case 1: 6902 for (i = 0; i < (full ? 8 : 4); i++) 6903 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i); 6904 break; 6905 6906 default: 6907 HALT_UNALLOC; 6908 } 6909 6910 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); 6911 if (full) 6912 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); 6913 } 6914 6915 static void 6916 do_vec_EXT (sim_cpu *cpu) 6917 { 6918 /* instr[31] = 0 6919 instr[30] = full/half 6920 instr[29,21] = 10 1110 000 6921 instr[20,16] = Vm 6922 instr[15] = 0 6923 instr[14,11] = source index 6924 instr[10] = 0 6925 instr[9,5] = Vn 6926 instr[4.0] = Vd. */ 6927 6928 unsigned vm = INSTR (20, 16); 6929 unsigned vn = INSTR (9, 5); 6930 unsigned vd = INSTR (4, 0); 6931 unsigned src_index = INSTR (14, 11); 6932 unsigned full = INSTR (30, 30); 6933 unsigned i; 6934 unsigned j; 6935 FRegister val; 6936 6937 NYI_assert (31, 21, 0x370); 6938 NYI_assert (15, 15, 0); 6939 NYI_assert (10, 10, 0); 6940 6941 if (!full && (src_index & 0x8)) 6942 HALT_UNALLOC; 6943 6944 j = 0; 6945 6946 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 6947 for (i = src_index; i < (full ? 16 : 8); i++) 6948 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i); 6949 for (i = 0; i < src_index; i++) 6950 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i); 6951 6952 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]); 6953 if (full) 6954 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]); 6955 } 6956 6957 static void 6958 dexAdvSIMD0 (sim_cpu *cpu) 6959 { 6960 /* instr [28,25] = 0 111. */ 6961 if ( INSTR (15, 10) == 0x07 6962 && (INSTR (9, 5) == 6963 INSTR (20, 16))) 6964 { 6965 if (INSTR (31, 21) == 0x075 6966 || INSTR (31, 21) == 0x275) 6967 { 6968 do_vec_MOV_whole_vector (cpu); 6969 return; 6970 } 6971 } 6972 6973 if (INSTR (29, 19) == 0x1E0) 6974 { 6975 do_vec_MOV_immediate (cpu); 6976 return; 6977 } 6978 6979 if (INSTR (29, 19) == 0x5E0) 6980 { 6981 do_vec_MVNI (cpu); 6982 return; 6983 } 6984 6985 if (INSTR (29, 19) == 0x1C0 6986 || INSTR (29, 19) == 0x1C1) 6987 { 6988 if (INSTR (15, 10) == 0x03) 6989 { 6990 do_vec_DUP_scalar_into_vector (cpu); 6991 return; 6992 } 6993 } 6994 6995 switch (INSTR (29, 24)) 6996 { 6997 case 0x0E: do_vec_op1 (cpu); return; 6998 case 0x0F: do_vec_op2 (cpu); return; 6999 7000 case 0x2E: 7001 if (INSTR (21, 21) == 1) 7002 { 7003 switch (INSTR (15, 10)) 7004 { 7005 case 0x02: 7006 do_vec_REV32 (cpu); 7007 return; 7008 7009 case 0x07: 7010 switch (INSTR (23, 22)) 7011 { 7012 case 0: do_vec_EOR (cpu); return; 7013 case 1: do_vec_BSL (cpu); return; 7014 case 2: 7015 case 3: do_vec_bit (cpu); return; 7016 } 7017 break; 7018 7019 case 0x08: do_vec_sub_long (cpu); return; 7020 case 0x11: do_vec_USHL (cpu); return; 7021 case 0x12: do_vec_CLZ (cpu); return; 7022 case 0x16: do_vec_NOT (cpu); return; 7023 case 0x19: do_vec_max (cpu); return; 7024 case 0x1B: do_vec_min (cpu); return; 7025 case 0x21: do_vec_SUB (cpu); return; 7026 case 0x25: do_vec_MLS (cpu); return; 7027 case 0x31: do_vec_FminmaxNMP (cpu); return; 7028 case 0x35: do_vec_FADDP (cpu); return; 7029 case 0x37: do_vec_FMUL (cpu); return; 7030 case 0x3F: do_vec_FDIV (cpu); return; 7031 7032 case 0x3E: 7033 switch (INSTR (20, 16)) 7034 { 7035 case 0x00: do_vec_FNEG (cpu); return; 7036 case 0x01: do_vec_FSQRT (cpu); return; 7037 default: HALT_NYI; 7038 } 7039 7040 case 0x0D: 7041 case 0x0F: 7042 case 0x22: 7043 case 0x23: 7044 case 0x26: 7045 case 0x2A: 7046 case 0x32: 7047 case 0x36: 7048 case 0x39: 7049 case 0x3A: 7050 do_vec_compare (cpu); return; 7051 7052 default: 7053 break; 7054 } 7055 } 7056 7057 if (INSTR (31, 21) == 0x370) 7058 { 7059 if (INSTR (10, 10)) 7060 do_vec_MOV_element (cpu); 7061 else 7062 do_vec_EXT (cpu); 7063 return; 7064 } 7065 7066 switch (INSTR (21, 10)) 7067 { 7068 case 0x82E: do_vec_neg (cpu); return; 7069 case 0x87E: do_vec_sqrt (cpu); return; 7070 default: 7071 if (INSTR (15, 10) == 0x30) 7072 { 7073 do_vec_mull (cpu); 7074 return; 7075 } 7076 break; 7077 } 7078 break; 7079 7080 case 0x2f: 7081 switch (INSTR (15, 10)) 7082 { 7083 case 0x01: do_vec_SSHR_USHR (cpu); return; 7084 case 0x10: 7085 case 0x12: do_vec_mls_indexed (cpu); return; 7086 case 0x29: do_vec_xtl (cpu); return; 7087 default: 7088 HALT_NYI; 7089 } 7090 7091 default: 7092 break; 7093 } 7094 7095 HALT_NYI; 7096 } 7097 7098 /* 3 sources. */ 7099 7100 /* Float multiply add. */ 7101 static void 7102 fmadds (sim_cpu *cpu) 7103 { 7104 unsigned sa = INSTR (14, 10); 7105 unsigned sm = INSTR (20, 16); 7106 unsigned sn = INSTR ( 9, 5); 7107 unsigned sd = INSTR ( 4, 0); 7108 7109 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7110 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) 7111 + aarch64_get_FP_float (cpu, sn) 7112 * aarch64_get_FP_float (cpu, sm)); 7113 } 7114 7115 /* Double multiply add. */ 7116 static void 7117 fmaddd (sim_cpu *cpu) 7118 { 7119 unsigned sa = INSTR (14, 10); 7120 unsigned sm = INSTR (20, 16); 7121 unsigned sn = INSTR ( 9, 5); 7122 unsigned sd = INSTR ( 4, 0); 7123 7124 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7125 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) 7126 + aarch64_get_FP_double (cpu, sn) 7127 * aarch64_get_FP_double (cpu, sm)); 7128 } 7129 7130 /* Float multiply subtract. */ 7131 static void 7132 fmsubs (sim_cpu *cpu) 7133 { 7134 unsigned sa = INSTR (14, 10); 7135 unsigned sm = INSTR (20, 16); 7136 unsigned sn = INSTR ( 9, 5); 7137 unsigned sd = INSTR ( 4, 0); 7138 7139 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7140 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) 7141 - aarch64_get_FP_float (cpu, sn) 7142 * aarch64_get_FP_float (cpu, sm)); 7143 } 7144 7145 /* Double multiply subtract. */ 7146 static void 7147 fmsubd (sim_cpu *cpu) 7148 { 7149 unsigned sa = INSTR (14, 10); 7150 unsigned sm = INSTR (20, 16); 7151 unsigned sn = INSTR ( 9, 5); 7152 unsigned sd = INSTR ( 4, 0); 7153 7154 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7155 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) 7156 - aarch64_get_FP_double (cpu, sn) 7157 * aarch64_get_FP_double (cpu, sm)); 7158 } 7159 7160 /* Float negative multiply add. */ 7161 static void 7162 fnmadds (sim_cpu *cpu) 7163 { 7164 unsigned sa = INSTR (14, 10); 7165 unsigned sm = INSTR (20, 16); 7166 unsigned sn = INSTR ( 9, 5); 7167 unsigned sd = INSTR ( 4, 0); 7168 7169 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7170 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) 7171 + (- aarch64_get_FP_float (cpu, sn)) 7172 * aarch64_get_FP_float (cpu, sm)); 7173 } 7174 7175 /* Double negative multiply add. */ 7176 static void 7177 fnmaddd (sim_cpu *cpu) 7178 { 7179 unsigned sa = INSTR (14, 10); 7180 unsigned sm = INSTR (20, 16); 7181 unsigned sn = INSTR ( 9, 5); 7182 unsigned sd = INSTR ( 4, 0); 7183 7184 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7185 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) 7186 + (- aarch64_get_FP_double (cpu, sn)) 7187 * aarch64_get_FP_double (cpu, sm)); 7188 } 7189 7190 /* Float negative multiply subtract. */ 7191 static void 7192 fnmsubs (sim_cpu *cpu) 7193 { 7194 unsigned sa = INSTR (14, 10); 7195 unsigned sm = INSTR (20, 16); 7196 unsigned sn = INSTR ( 9, 5); 7197 unsigned sd = INSTR ( 4, 0); 7198 7199 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7200 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) 7201 + aarch64_get_FP_float (cpu, sn) 7202 * aarch64_get_FP_float (cpu, sm)); 7203 } 7204 7205 /* Double negative multiply subtract. */ 7206 static void 7207 fnmsubd (sim_cpu *cpu) 7208 { 7209 unsigned sa = INSTR (14, 10); 7210 unsigned sm = INSTR (20, 16); 7211 unsigned sn = INSTR ( 9, 5); 7212 unsigned sd = INSTR ( 4, 0); 7213 7214 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7215 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) 7216 + aarch64_get_FP_double (cpu, sn) 7217 * aarch64_get_FP_double (cpu, sm)); 7218 } 7219 7220 static void 7221 dexSimpleFPDataProc3Source (sim_cpu *cpu) 7222 { 7223 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC 7224 instr[30] = 0 7225 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC 7226 instr[28,25] = 1111 7227 instr[24] = 1 7228 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC 7229 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated 7230 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */ 7231 7232 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); 7233 /* dispatch on combined type:o1:o2. */ 7234 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15); 7235 7236 if (M_S != 0) 7237 HALT_UNALLOC; 7238 7239 switch (dispatch) 7240 { 7241 case 0: fmadds (cpu); return; 7242 case 1: fmsubs (cpu); return; 7243 case 2: fnmadds (cpu); return; 7244 case 3: fnmsubs (cpu); return; 7245 case 4: fmaddd (cpu); return; 7246 case 5: fmsubd (cpu); return; 7247 case 6: fnmaddd (cpu); return; 7248 case 7: fnmsubd (cpu); return; 7249 default: 7250 /* type > 1 is currently unallocated. */ 7251 HALT_UNALLOC; 7252 } 7253 } 7254 7255 static void 7256 dexSimpleFPFixedConvert (sim_cpu *cpu) 7257 { 7258 HALT_NYI; 7259 } 7260 7261 static void 7262 dexSimpleFPCondCompare (sim_cpu *cpu) 7263 { 7264 /* instr [31,23] = 0001 1110 0 7265 instr [22] = type 7266 instr [21] = 1 7267 instr [20,16] = Rm 7268 instr [15,12] = condition 7269 instr [11,10] = 01 7270 instr [9,5] = Rn 7271 instr [4] = 0 7272 instr [3,0] = nzcv */ 7273 7274 unsigned rm = INSTR (20, 16); 7275 unsigned rn = INSTR (9, 5); 7276 7277 NYI_assert (31, 23, 0x3C); 7278 NYI_assert (11, 10, 0x1); 7279 NYI_assert (4, 4, 0); 7280 7281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7282 if (! testConditionCode (cpu, INSTR (15, 12))) 7283 { 7284 aarch64_set_CPSR (cpu, INSTR (3, 0)); 7285 return; 7286 } 7287 7288 if (INSTR (22, 22)) 7289 { 7290 /* Double precision. */ 7291 double val1 = aarch64_get_vec_double (cpu, rn, 0); 7292 double val2 = aarch64_get_vec_double (cpu, rm, 0); 7293 7294 /* FIXME: Check for NaNs. */ 7295 if (val1 == val2) 7296 aarch64_set_CPSR (cpu, (Z | C)); 7297 else if (val1 < val2) 7298 aarch64_set_CPSR (cpu, N); 7299 else /* val1 > val2 */ 7300 aarch64_set_CPSR (cpu, C); 7301 } 7302 else 7303 { 7304 /* Single precision. */ 7305 float val1 = aarch64_get_vec_float (cpu, rn, 0); 7306 float val2 = aarch64_get_vec_float (cpu, rm, 0); 7307 7308 /* FIXME: Check for NaNs. */ 7309 if (val1 == val2) 7310 aarch64_set_CPSR (cpu, (Z | C)); 7311 else if (val1 < val2) 7312 aarch64_set_CPSR (cpu, N); 7313 else /* val1 > val2 */ 7314 aarch64_set_CPSR (cpu, C); 7315 } 7316 } 7317 7318 /* 2 sources. */ 7319 7320 /* Float add. */ 7321 static void 7322 fadds (sim_cpu *cpu) 7323 { 7324 unsigned sm = INSTR (20, 16); 7325 unsigned sn = INSTR ( 9, 5); 7326 unsigned sd = INSTR ( 4, 0); 7327 7328 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7329 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) 7330 + aarch64_get_FP_float (cpu, sm)); 7331 } 7332 7333 /* Double add. */ 7334 static void 7335 faddd (sim_cpu *cpu) 7336 { 7337 unsigned sm = INSTR (20, 16); 7338 unsigned sn = INSTR ( 9, 5); 7339 unsigned sd = INSTR ( 4, 0); 7340 7341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7342 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) 7343 + aarch64_get_FP_double (cpu, sm)); 7344 } 7345 7346 /* Float divide. */ 7347 static void 7348 fdivs (sim_cpu *cpu) 7349 { 7350 unsigned sm = INSTR (20, 16); 7351 unsigned sn = INSTR ( 9, 5); 7352 unsigned sd = INSTR ( 4, 0); 7353 7354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7355 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) 7356 / aarch64_get_FP_float (cpu, sm)); 7357 } 7358 7359 /* Double divide. */ 7360 static void 7361 fdivd (sim_cpu *cpu) 7362 { 7363 unsigned sm = INSTR (20, 16); 7364 unsigned sn = INSTR ( 9, 5); 7365 unsigned sd = INSTR ( 4, 0); 7366 7367 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7368 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) 7369 / aarch64_get_FP_double (cpu, sm)); 7370 } 7371 7372 /* Float multiply. */ 7373 static void 7374 fmuls (sim_cpu *cpu) 7375 { 7376 unsigned sm = INSTR (20, 16); 7377 unsigned sn = INSTR ( 9, 5); 7378 unsigned sd = INSTR ( 4, 0); 7379 7380 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7381 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) 7382 * aarch64_get_FP_float (cpu, sm)); 7383 } 7384 7385 /* Double multiply. */ 7386 static void 7387 fmuld (sim_cpu *cpu) 7388 { 7389 unsigned sm = INSTR (20, 16); 7390 unsigned sn = INSTR ( 9, 5); 7391 unsigned sd = INSTR ( 4, 0); 7392 7393 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7394 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) 7395 * aarch64_get_FP_double (cpu, sm)); 7396 } 7397 7398 /* Float negate and multiply. */ 7399 static void 7400 fnmuls (sim_cpu *cpu) 7401 { 7402 unsigned sm = INSTR (20, 16); 7403 unsigned sn = INSTR ( 9, 5); 7404 unsigned sd = INSTR ( 4, 0); 7405 7406 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7407 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn) 7408 * aarch64_get_FP_float (cpu, sm))); 7409 } 7410 7411 /* Double negate and multiply. */ 7412 static void 7413 fnmuld (sim_cpu *cpu) 7414 { 7415 unsigned sm = INSTR (20, 16); 7416 unsigned sn = INSTR ( 9, 5); 7417 unsigned sd = INSTR ( 4, 0); 7418 7419 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7420 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn) 7421 * aarch64_get_FP_double (cpu, sm))); 7422 } 7423 7424 /* Float subtract. */ 7425 static void 7426 fsubs (sim_cpu *cpu) 7427 { 7428 unsigned sm = INSTR (20, 16); 7429 unsigned sn = INSTR ( 9, 5); 7430 unsigned sd = INSTR ( 4, 0); 7431 7432 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7433 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) 7434 - aarch64_get_FP_float (cpu, sm)); 7435 } 7436 7437 /* Double subtract. */ 7438 static void 7439 fsubd (sim_cpu *cpu) 7440 { 7441 unsigned sm = INSTR (20, 16); 7442 unsigned sn = INSTR ( 9, 5); 7443 unsigned sd = INSTR ( 4, 0); 7444 7445 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7446 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) 7447 - aarch64_get_FP_double (cpu, sm)); 7448 } 7449 7450 static void 7451 do_FMINNM (sim_cpu *cpu) 7452 { 7453 /* instr[31,23] = 0 0011 1100 7454 instr[22] = float(0)/double(1) 7455 instr[21] = 1 7456 instr[20,16] = Sm 7457 instr[15,10] = 01 1110 7458 instr[9,5] = Sn 7459 instr[4,0] = Cpu */ 7460 7461 unsigned sm = INSTR (20, 16); 7462 unsigned sn = INSTR ( 9, 5); 7463 unsigned sd = INSTR ( 4, 0); 7464 7465 NYI_assert (31, 23, 0x03C); 7466 NYI_assert (15, 10, 0x1E); 7467 7468 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7469 if (INSTR (22, 22)) 7470 aarch64_set_FP_double (cpu, sd, 7471 dminnm (aarch64_get_FP_double (cpu, sn), 7472 aarch64_get_FP_double (cpu, sm))); 7473 else 7474 aarch64_set_FP_float (cpu, sd, 7475 fminnm (aarch64_get_FP_float (cpu, sn), 7476 aarch64_get_FP_float (cpu, sm))); 7477 } 7478 7479 static void 7480 do_FMAXNM (sim_cpu *cpu) 7481 { 7482 /* instr[31,23] = 0 0011 1100 7483 instr[22] = float(0)/double(1) 7484 instr[21] = 1 7485 instr[20,16] = Sm 7486 instr[15,10] = 01 1010 7487 instr[9,5] = Sn 7488 instr[4,0] = Cpu */ 7489 7490 unsigned sm = INSTR (20, 16); 7491 unsigned sn = INSTR ( 9, 5); 7492 unsigned sd = INSTR ( 4, 0); 7493 7494 NYI_assert (31, 23, 0x03C); 7495 NYI_assert (15, 10, 0x1A); 7496 7497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7498 if (INSTR (22, 22)) 7499 aarch64_set_FP_double (cpu, sd, 7500 dmaxnm (aarch64_get_FP_double (cpu, sn), 7501 aarch64_get_FP_double (cpu, sm))); 7502 else 7503 aarch64_set_FP_float (cpu, sd, 7504 fmaxnm (aarch64_get_FP_float (cpu, sn), 7505 aarch64_get_FP_float (cpu, sm))); 7506 } 7507 7508 static void 7509 dexSimpleFPDataProc2Source (sim_cpu *cpu) 7510 { 7511 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC 7512 instr[30] = 0 7513 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC 7514 instr[28,25] = 1111 7515 instr[24] = 0 7516 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC 7517 instr[21] = 1 7518 instr[20,16] = Vm 7519 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV 7520 0010 ==> FADD, 0011 ==> FSUB, 7521 0100 ==> FMAX, 0101 ==> FMIN 7522 0110 ==> FMAXNM, 0111 ==> FMINNM 7523 1000 ==> FNMUL, ow ==> UNALLOC 7524 instr[11,10] = 10 7525 instr[9,5] = Vn 7526 instr[4,0] = Vd */ 7527 7528 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); 7529 uint32_t type = INSTR (23, 22); 7530 /* Dispatch on opcode. */ 7531 uint32_t dispatch = INSTR (15, 12); 7532 7533 if (type > 1) 7534 HALT_UNALLOC; 7535 7536 if (M_S != 0) 7537 HALT_UNALLOC; 7538 7539 if (type) 7540 switch (dispatch) 7541 { 7542 case 0: fmuld (cpu); return; 7543 case 1: fdivd (cpu); return; 7544 case 2: faddd (cpu); return; 7545 case 3: fsubd (cpu); return; 7546 case 6: do_FMAXNM (cpu); return; 7547 case 7: do_FMINNM (cpu); return; 7548 case 8: fnmuld (cpu); return; 7549 7550 /* Have not yet implemented fmax and fmin. */ 7551 case 4: 7552 case 5: 7553 HALT_NYI; 7554 7555 default: 7556 HALT_UNALLOC; 7557 } 7558 else /* type == 0 => floats. */ 7559 switch (dispatch) 7560 { 7561 case 0: fmuls (cpu); return; 7562 case 1: fdivs (cpu); return; 7563 case 2: fadds (cpu); return; 7564 case 3: fsubs (cpu); return; 7565 case 6: do_FMAXNM (cpu); return; 7566 case 7: do_FMINNM (cpu); return; 7567 case 8: fnmuls (cpu); return; 7568 7569 case 4: 7570 case 5: 7571 HALT_NYI; 7572 7573 default: 7574 HALT_UNALLOC; 7575 } 7576 } 7577 7578 static void 7579 dexSimpleFPCondSelect (sim_cpu *cpu) 7580 { 7581 /* FCSEL 7582 instr[31,23] = 0 0011 1100 7583 instr[22] = 0=>single 1=>double 7584 instr[21] = 1 7585 instr[20,16] = Sm 7586 instr[15,12] = cond 7587 instr[11,10] = 11 7588 instr[9,5] = Sn 7589 instr[4,0] = Cpu */ 7590 unsigned sm = INSTR (20, 16); 7591 unsigned sn = INSTR ( 9, 5); 7592 unsigned sd = INSTR ( 4, 0); 7593 uint32_t set = testConditionCode (cpu, INSTR (15, 12)); 7594 7595 NYI_assert (31, 23, 0x03C); 7596 NYI_assert (11, 10, 0x3); 7597 7598 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7599 if (INSTR (22, 22)) 7600 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn) 7601 : aarch64_get_FP_double (cpu, sm))); 7602 else 7603 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn) 7604 : aarch64_get_FP_float (cpu, sm))); 7605 } 7606 7607 /* Store 32 bit unscaled signed 9 bit. */ 7608 static void 7609 fsturs (sim_cpu *cpu, int32_t offset) 7610 { 7611 unsigned int rn = INSTR (9, 5); 7612 unsigned int st = INSTR (4, 0); 7613 7614 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7615 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset, 7616 aarch64_get_vec_u32 (cpu, st, 0)); 7617 } 7618 7619 /* Store 64 bit unscaled signed 9 bit. */ 7620 static void 7621 fsturd (sim_cpu *cpu, int32_t offset) 7622 { 7623 unsigned int rn = INSTR (9, 5); 7624 unsigned int st = INSTR (4, 0); 7625 7626 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7627 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset, 7628 aarch64_get_vec_u64 (cpu, st, 0)); 7629 } 7630 7631 /* Store 128 bit unscaled signed 9 bit. */ 7632 static void 7633 fsturq (sim_cpu *cpu, int32_t offset) 7634 { 7635 unsigned int rn = INSTR (9, 5); 7636 unsigned int st = INSTR (4, 0); 7637 FRegister a; 7638 7639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7640 aarch64_get_FP_long_double (cpu, st, & a); 7641 aarch64_set_mem_long_double (cpu, 7642 aarch64_get_reg_u64 (cpu, rn, 1) 7643 + offset, a); 7644 } 7645 7646 /* TODO FP move register. */ 7647 7648 /* 32 bit fp to fp move register. */ 7649 static void 7650 ffmovs (sim_cpu *cpu) 7651 { 7652 unsigned int rn = INSTR (9, 5); 7653 unsigned int st = INSTR (4, 0); 7654 7655 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7656 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn)); 7657 } 7658 7659 /* 64 bit fp to fp move register. */ 7660 static void 7661 ffmovd (sim_cpu *cpu) 7662 { 7663 unsigned int rn = INSTR (9, 5); 7664 unsigned int st = INSTR (4, 0); 7665 7666 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7667 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn)); 7668 } 7669 7670 /* 32 bit GReg to Vec move register. */ 7671 static void 7672 fgmovs (sim_cpu *cpu) 7673 { 7674 unsigned int rn = INSTR (9, 5); 7675 unsigned int st = INSTR (4, 0); 7676 7677 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7678 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP)); 7679 } 7680 7681 /* 64 bit g to fp move register. */ 7682 static void 7683 fgmovd (sim_cpu *cpu) 7684 { 7685 unsigned int rn = INSTR (9, 5); 7686 unsigned int st = INSTR (4, 0); 7687 7688 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7689 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 7690 } 7691 7692 /* 32 bit fp to g move register. */ 7693 static void 7694 gfmovs (sim_cpu *cpu) 7695 { 7696 unsigned int rn = INSTR (9, 5); 7697 unsigned int st = INSTR (4, 0); 7698 7699 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7700 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0)); 7701 } 7702 7703 /* 64 bit fp to g move register. */ 7704 static void 7705 gfmovd (sim_cpu *cpu) 7706 { 7707 unsigned int rn = INSTR (9, 5); 7708 unsigned int st = INSTR (4, 0); 7709 7710 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7711 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0)); 7712 } 7713 7714 /* FP move immediate 7715 7716 These install an immediate 8 bit value in the target register 7717 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3 7718 bit exponent. */ 7719 7720 static void 7721 fmovs (sim_cpu *cpu) 7722 { 7723 unsigned int sd = INSTR (4, 0); 7724 uint32_t imm = INSTR (20, 13); 7725 float f = fp_immediate_for_encoding_32 (imm); 7726 7727 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7728 aarch64_set_FP_float (cpu, sd, f); 7729 } 7730 7731 static void 7732 fmovd (sim_cpu *cpu) 7733 { 7734 unsigned int sd = INSTR (4, 0); 7735 uint32_t imm = INSTR (20, 13); 7736 double d = fp_immediate_for_encoding_64 (imm); 7737 7738 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7739 aarch64_set_FP_double (cpu, sd, d); 7740 } 7741 7742 static void 7743 dexSimpleFPImmediate (sim_cpu *cpu) 7744 { 7745 /* instr[31,23] == 00111100 7746 instr[22] == type : single(0)/double(1) 7747 instr[21] == 1 7748 instr[20,13] == imm8 7749 instr[12,10] == 100 7750 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC 7751 instr[4,0] == Rd */ 7752 uint32_t imm5 = INSTR (9, 5); 7753 7754 NYI_assert (31, 23, 0x3C); 7755 7756 if (imm5 != 0) 7757 HALT_UNALLOC; 7758 7759 if (INSTR (22, 22)) 7760 fmovd (cpu); 7761 else 7762 fmovs (cpu); 7763 } 7764 7765 /* TODO specific decode and execute for group Load Store. */ 7766 7767 /* TODO FP load/store single register (unscaled offset). */ 7768 7769 /* TODO load 8 bit unscaled signed 9 bit. */ 7770 /* TODO load 16 bit unscaled signed 9 bit. */ 7771 7772 /* Load 32 bit unscaled signed 9 bit. */ 7773 static void 7774 fldurs (sim_cpu *cpu, int32_t offset) 7775 { 7776 unsigned int rn = INSTR (9, 5); 7777 unsigned int st = INSTR (4, 0); 7778 7779 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7780 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 7781 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); 7782 } 7783 7784 /* Load 64 bit unscaled signed 9 bit. */ 7785 static void 7786 fldurd (sim_cpu *cpu, int32_t offset) 7787 { 7788 unsigned int rn = INSTR (9, 5); 7789 unsigned int st = INSTR (4, 0); 7790 7791 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7792 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 7793 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); 7794 } 7795 7796 /* Load 128 bit unscaled signed 9 bit. */ 7797 static void 7798 fldurq (sim_cpu *cpu, int32_t offset) 7799 { 7800 unsigned int rn = INSTR (9, 5); 7801 unsigned int st = INSTR (4, 0); 7802 FRegister a; 7803 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; 7804 7805 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7806 aarch64_get_mem_long_double (cpu, addr, & a); 7807 aarch64_set_FP_long_double (cpu, st, a); 7808 } 7809 7810 /* TODO store 8 bit unscaled signed 9 bit. */ 7811 /* TODO store 16 bit unscaled signed 9 bit. */ 7812 7813 7814 /* 1 source. */ 7815 7816 /* Float absolute value. */ 7817 static void 7818 fabss (sim_cpu *cpu) 7819 { 7820 unsigned sn = INSTR (9, 5); 7821 unsigned sd = INSTR (4, 0); 7822 float value = aarch64_get_FP_float (cpu, sn); 7823 7824 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7825 aarch64_set_FP_float (cpu, sd, fabsf (value)); 7826 } 7827 7828 /* Double absolute value. */ 7829 static void 7830 fabcpu (sim_cpu *cpu) 7831 { 7832 unsigned sn = INSTR (9, 5); 7833 unsigned sd = INSTR (4, 0); 7834 double value = aarch64_get_FP_double (cpu, sn); 7835 7836 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7837 aarch64_set_FP_double (cpu, sd, fabs (value)); 7838 } 7839 7840 /* Float negative value. */ 7841 static void 7842 fnegs (sim_cpu *cpu) 7843 { 7844 unsigned sn = INSTR (9, 5); 7845 unsigned sd = INSTR (4, 0); 7846 7847 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7848 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn)); 7849 } 7850 7851 /* Double negative value. */ 7852 static void 7853 fnegd (sim_cpu *cpu) 7854 { 7855 unsigned sn = INSTR (9, 5); 7856 unsigned sd = INSTR (4, 0); 7857 7858 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7859 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn)); 7860 } 7861 7862 /* Float square root. */ 7863 static void 7864 fsqrts (sim_cpu *cpu) 7865 { 7866 unsigned sn = INSTR (9, 5); 7867 unsigned sd = INSTR (4, 0); 7868 7869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7870 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn))); 7871 } 7872 7873 /* Double square root. */ 7874 static void 7875 fsqrtd (sim_cpu *cpu) 7876 { 7877 unsigned sn = INSTR (9, 5); 7878 unsigned sd = INSTR (4, 0); 7879 7880 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7881 aarch64_set_FP_double (cpu, sd, 7882 sqrt (aarch64_get_FP_double (cpu, sn))); 7883 } 7884 7885 /* Convert double to float. */ 7886 static void 7887 fcvtds (sim_cpu *cpu) 7888 { 7889 unsigned sn = INSTR (9, 5); 7890 unsigned sd = INSTR (4, 0); 7891 7892 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7893 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn)); 7894 } 7895 7896 /* Convert float to double. */ 7897 static void 7898 fcvtcpu (sim_cpu *cpu) 7899 { 7900 unsigned sn = INSTR (9, 5); 7901 unsigned sd = INSTR (4, 0); 7902 7903 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7904 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn)); 7905 } 7906 7907 static void 7908 do_FRINT (sim_cpu *cpu) 7909 { 7910 /* instr[31,23] = 0001 1110 0 7911 instr[22] = single(0)/double(1) 7912 instr[21,18] = 1001 7913 instr[17,15] = rounding mode 7914 instr[14,10] = 10000 7915 instr[9,5] = source 7916 instr[4,0] = dest */ 7917 7918 float val; 7919 unsigned rs = INSTR (9, 5); 7920 unsigned rd = INSTR (4, 0); 7921 unsigned int rmode = INSTR (17, 15); 7922 7923 NYI_assert (31, 23, 0x03C); 7924 NYI_assert (21, 18, 0x9); 7925 NYI_assert (14, 10, 0x10); 7926 7927 if (rmode == 6 || rmode == 7) 7928 /* FIXME: Add support for rmode == 6 exactness check. */ 7929 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22); 7930 7931 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 7932 if (INSTR (22, 22)) 7933 { 7934 double val = aarch64_get_FP_double (cpu, rs); 7935 7936 switch (rmode) 7937 { 7938 case 0: /* mode N: nearest or even. */ 7939 { 7940 double rval = round (val); 7941 7942 if (val - rval == 0.5) 7943 { 7944 if (((rval / 2.0) * 2.0) != rval) 7945 rval += 1.0; 7946 } 7947 7948 aarch64_set_FP_double (cpu, rd, round (val)); 7949 return; 7950 } 7951 7952 case 1: /* mode P: towards +inf. */ 7953 if (val < 0.0) 7954 aarch64_set_FP_double (cpu, rd, trunc (val)); 7955 else 7956 aarch64_set_FP_double (cpu, rd, round (val)); 7957 return; 7958 7959 case 2: /* mode M: towards -inf. */ 7960 if (val < 0.0) 7961 aarch64_set_FP_double (cpu, rd, round (val)); 7962 else 7963 aarch64_set_FP_double (cpu, rd, trunc (val)); 7964 return; 7965 7966 case 3: /* mode Z: towards 0. */ 7967 aarch64_set_FP_double (cpu, rd, trunc (val)); 7968 return; 7969 7970 case 4: /* mode A: away from 0. */ 7971 aarch64_set_FP_double (cpu, rd, round (val)); 7972 return; 7973 7974 case 6: /* mode X: use FPCR with exactness check. */ 7975 case 7: /* mode I: use FPCR mode. */ 7976 HALT_NYI; 7977 7978 default: 7979 HALT_UNALLOC; 7980 } 7981 } 7982 7983 val = aarch64_get_FP_float (cpu, rs); 7984 7985 switch (rmode) 7986 { 7987 case 0: /* mode N: nearest or even. */ 7988 { 7989 float rval = roundf (val); 7990 7991 if (val - rval == 0.5) 7992 { 7993 if (((rval / 2.0) * 2.0) != rval) 7994 rval += 1.0; 7995 } 7996 7997 aarch64_set_FP_float (cpu, rd, rval); 7998 return; 7999 } 8000 8001 case 1: /* mode P: towards +inf. */ 8002 if (val < 0.0) 8003 aarch64_set_FP_float (cpu, rd, truncf (val)); 8004 else 8005 aarch64_set_FP_float (cpu, rd, roundf (val)); 8006 return; 8007 8008 case 2: /* mode M: towards -inf. */ 8009 if (val < 0.0) 8010 aarch64_set_FP_float (cpu, rd, truncf (val)); 8011 else 8012 aarch64_set_FP_float (cpu, rd, roundf (val)); 8013 return; 8014 8015 case 3: /* mode Z: towards 0. */ 8016 aarch64_set_FP_float (cpu, rd, truncf (val)); 8017 return; 8018 8019 case 4: /* mode A: away from 0. */ 8020 aarch64_set_FP_float (cpu, rd, roundf (val)); 8021 return; 8022 8023 case 6: /* mode X: use FPCR with exactness check. */ 8024 case 7: /* mode I: use FPCR mode. */ 8025 HALT_NYI; 8026 8027 default: 8028 HALT_UNALLOC; 8029 } 8030 } 8031 8032 /* Convert half to float. */ 8033 static void 8034 do_FCVT_half_to_single (sim_cpu *cpu) 8035 { 8036 unsigned rn = INSTR (9, 5); 8037 unsigned rd = INSTR (4, 0); 8038 8039 NYI_assert (31, 10, 0x7B890); 8040 8041 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8042 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn)); 8043 } 8044 8045 /* Convert half to double. */ 8046 static void 8047 do_FCVT_half_to_double (sim_cpu *cpu) 8048 { 8049 unsigned rn = INSTR (9, 5); 8050 unsigned rd = INSTR (4, 0); 8051 8052 NYI_assert (31, 10, 0x7B8B0); 8053 8054 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8055 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn)); 8056 } 8057 8058 static void 8059 do_FCVT_single_to_half (sim_cpu *cpu) 8060 { 8061 unsigned rn = INSTR (9, 5); 8062 unsigned rd = INSTR (4, 0); 8063 8064 NYI_assert (31, 10, 0x788F0); 8065 8066 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8067 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn)); 8068 } 8069 8070 /* Convert double to half. */ 8071 static void 8072 do_FCVT_double_to_half (sim_cpu *cpu) 8073 { 8074 unsigned rn = INSTR (9, 5); 8075 unsigned rd = INSTR (4, 0); 8076 8077 NYI_assert (31, 10, 0x798F0); 8078 8079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8080 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn)); 8081 } 8082 8083 static void 8084 dexSimpleFPDataProc1Source (sim_cpu *cpu) 8085 { 8086 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC 8087 instr[30] = 0 8088 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC 8089 instr[28,25] = 1111 8090 instr[24] = 0 8091 instr[23,22] ==> type : 00 ==> source is single, 8092 01 ==> source is double 8093 10 ==> UNALLOC 8094 11 ==> UNALLOC or source is half 8095 instr[21] = 1 8096 instr[20,15] ==> opcode : with type 00 or 01 8097 000000 ==> FMOV, 000001 ==> FABS, 8098 000010 ==> FNEG, 000011 ==> FSQRT, 8099 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double) 8100 000110 ==> UNALLOC, 000111 ==> FCVT (to half) 8101 001000 ==> FRINTN, 001001 ==> FRINTP, 8102 001010 ==> FRINTM, 001011 ==> FRINTZ, 8103 001100 ==> FRINTA, 001101 ==> UNALLOC 8104 001110 ==> FRINTX, 001111 ==> FRINTI 8105 with type 11 8106 000100 ==> FCVT (half-to-single) 8107 000101 ==> FCVT (half-to-double) 8108 instr[14,10] = 10000. */ 8109 8110 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); 8111 uint32_t type = INSTR (23, 22); 8112 uint32_t opcode = INSTR (20, 15); 8113 8114 if (M_S != 0) 8115 HALT_UNALLOC; 8116 8117 if (type == 3) 8118 { 8119 if (opcode == 4) 8120 do_FCVT_half_to_single (cpu); 8121 else if (opcode == 5) 8122 do_FCVT_half_to_double (cpu); 8123 else 8124 HALT_UNALLOC; 8125 return; 8126 } 8127 8128 if (type == 2) 8129 HALT_UNALLOC; 8130 8131 switch (opcode) 8132 { 8133 case 0: 8134 if (type) 8135 ffmovd (cpu); 8136 else 8137 ffmovs (cpu); 8138 return; 8139 8140 case 1: 8141 if (type) 8142 fabcpu (cpu); 8143 else 8144 fabss (cpu); 8145 return; 8146 8147 case 2: 8148 if (type) 8149 fnegd (cpu); 8150 else 8151 fnegs (cpu); 8152 return; 8153 8154 case 3: 8155 if (type) 8156 fsqrtd (cpu); 8157 else 8158 fsqrts (cpu); 8159 return; 8160 8161 case 4: 8162 if (type) 8163 fcvtds (cpu); 8164 else 8165 HALT_UNALLOC; 8166 return; 8167 8168 case 5: 8169 if (type) 8170 HALT_UNALLOC; 8171 fcvtcpu (cpu); 8172 return; 8173 8174 case 8: /* FRINTN etc. */ 8175 case 9: 8176 case 10: 8177 case 11: 8178 case 12: 8179 case 14: 8180 case 15: 8181 do_FRINT (cpu); 8182 return; 8183 8184 case 7: 8185 if (INSTR (22, 22)) 8186 do_FCVT_double_to_half (cpu); 8187 else 8188 do_FCVT_single_to_half (cpu); 8189 return; 8190 8191 case 13: 8192 HALT_NYI; 8193 8194 default: 8195 HALT_UNALLOC; 8196 } 8197 } 8198 8199 /* 32 bit signed int to float. */ 8200 static void 8201 scvtf32 (sim_cpu *cpu) 8202 { 8203 unsigned rn = INSTR (9, 5); 8204 unsigned sd = INSTR (4, 0); 8205 8206 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8207 aarch64_set_FP_float 8208 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP)); 8209 } 8210 8211 /* signed int to float. */ 8212 static void 8213 scvtf (sim_cpu *cpu) 8214 { 8215 unsigned rn = INSTR (9, 5); 8216 unsigned sd = INSTR (4, 0); 8217 8218 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8219 aarch64_set_FP_float 8220 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP)); 8221 } 8222 8223 /* 32 bit signed int to double. */ 8224 static void 8225 scvtd32 (sim_cpu *cpu) 8226 { 8227 unsigned rn = INSTR (9, 5); 8228 unsigned sd = INSTR (4, 0); 8229 8230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8231 aarch64_set_FP_double 8232 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP)); 8233 } 8234 8235 /* signed int to double. */ 8236 static void 8237 scvtd (sim_cpu *cpu) 8238 { 8239 unsigned rn = INSTR (9, 5); 8240 unsigned sd = INSTR (4, 0); 8241 8242 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8243 aarch64_set_FP_double 8244 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP)); 8245 } 8246 8247 static const float FLOAT_INT_MAX = (float) INT_MAX; 8248 static const float FLOAT_INT_MIN = (float) INT_MIN; 8249 static const double DOUBLE_INT_MAX = (double) INT_MAX; 8250 static const double DOUBLE_INT_MIN = (double) INT_MIN; 8251 static const float FLOAT_LONG_MAX = (float) LONG_MAX; 8252 static const float FLOAT_LONG_MIN = (float) LONG_MIN; 8253 static const double DOUBLE_LONG_MAX = (double) LONG_MAX; 8254 static const double DOUBLE_LONG_MIN = (double) LONG_MIN; 8255 8256 #define UINT_MIN 0 8257 #define ULONG_MIN 0 8258 static const float FLOAT_UINT_MAX = (float) UINT_MAX; 8259 static const float FLOAT_UINT_MIN = (float) UINT_MIN; 8260 static const double DOUBLE_UINT_MAX = (double) UINT_MAX; 8261 static const double DOUBLE_UINT_MIN = (double) UINT_MIN; 8262 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX; 8263 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN; 8264 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX; 8265 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN; 8266 8267 /* Check for FP exception conditions: 8268 NaN raises IO 8269 Infinity raises IO 8270 Out of Range raises IO and IX and saturates value 8271 Denormal raises ID and IX and sets to zero. */ 8272 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \ 8273 do \ 8274 { \ 8275 switch (fpclassify (F)) \ 8276 { \ 8277 case FP_INFINITE: \ 8278 case FP_NAN: \ 8279 aarch64_set_FPSR (cpu, IO); \ 8280 if (signbit (F)) \ 8281 VALUE = ITYPE##_MAX; \ 8282 else \ 8283 VALUE = ITYPE##_MIN; \ 8284 break; \ 8285 \ 8286 case FP_NORMAL: \ 8287 if (F >= FTYPE##_##ITYPE##_MAX) \ 8288 { \ 8289 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \ 8290 VALUE = ITYPE##_MAX; \ 8291 } \ 8292 else if (F <= FTYPE##_##ITYPE##_MIN) \ 8293 { \ 8294 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \ 8295 VALUE = ITYPE##_MIN; \ 8296 } \ 8297 break; \ 8298 \ 8299 case FP_SUBNORMAL: \ 8300 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \ 8301 VALUE = 0; \ 8302 break; \ 8303 \ 8304 default: \ 8305 case FP_ZERO: \ 8306 VALUE = 0; \ 8307 break; \ 8308 } \ 8309 } \ 8310 while (0) 8311 8312 /* 32 bit convert float to signed int truncate towards zero. */ 8313 static void 8314 fcvtszs32 (sim_cpu *cpu) 8315 { 8316 unsigned sn = INSTR (9, 5); 8317 unsigned rd = INSTR (4, 0); 8318 /* TODO : check that this rounds toward zero. */ 8319 float f = aarch64_get_FP_float (cpu, sn); 8320 int32_t value = (int32_t) f; 8321 8322 RAISE_EXCEPTIONS (f, value, FLOAT, INT); 8323 8324 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8325 /* Avoid sign extension to 64 bit. */ 8326 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); 8327 } 8328 8329 /* 64 bit convert float to signed int truncate towards zero. */ 8330 static void 8331 fcvtszs (sim_cpu *cpu) 8332 { 8333 unsigned sn = INSTR (9, 5); 8334 unsigned rd = INSTR (4, 0); 8335 float f = aarch64_get_FP_float (cpu, sn); 8336 int64_t value = (int64_t) f; 8337 8338 RAISE_EXCEPTIONS (f, value, FLOAT, LONG); 8339 8340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8341 aarch64_set_reg_s64 (cpu, rd, NO_SP, value); 8342 } 8343 8344 /* 32 bit convert double to signed int truncate towards zero. */ 8345 static void 8346 fcvtszd32 (sim_cpu *cpu) 8347 { 8348 unsigned sn = INSTR (9, 5); 8349 unsigned rd = INSTR (4, 0); 8350 /* TODO : check that this rounds toward zero. */ 8351 double d = aarch64_get_FP_double (cpu, sn); 8352 int32_t value = (int32_t) d; 8353 8354 RAISE_EXCEPTIONS (d, value, DOUBLE, INT); 8355 8356 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8357 /* Avoid sign extension to 64 bit. */ 8358 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); 8359 } 8360 8361 /* 64 bit convert double to signed int truncate towards zero. */ 8362 static void 8363 fcvtszd (sim_cpu *cpu) 8364 { 8365 unsigned sn = INSTR (9, 5); 8366 unsigned rd = INSTR (4, 0); 8367 /* TODO : check that this rounds toward zero. */ 8368 double d = aarch64_get_FP_double (cpu, sn); 8369 int64_t value; 8370 8371 value = (int64_t) d; 8372 8373 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG); 8374 8375 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8376 aarch64_set_reg_s64 (cpu, rd, NO_SP, value); 8377 } 8378 8379 static void 8380 do_fcvtzu (sim_cpu *cpu) 8381 { 8382 /* instr[31] = size: 32-bit (0), 64-bit (1) 8383 instr[30,23] = 00111100 8384 instr[22] = type: single (0)/ double (1) 8385 instr[21] = enable (0)/disable(1) precision 8386 instr[20,16] = 11001 8387 instr[15,10] = precision 8388 instr[9,5] = Rs 8389 instr[4,0] = Rd. */ 8390 8391 unsigned rs = INSTR (9, 5); 8392 unsigned rd = INSTR (4, 0); 8393 8394 NYI_assert (30, 23, 0x3C); 8395 NYI_assert (20, 16, 0x19); 8396 8397 if (INSTR (21, 21) != 1) 8398 /* Convert to fixed point. */ 8399 HALT_NYI; 8400 8401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8402 if (INSTR (31, 31)) 8403 { 8404 /* Convert to unsigned 64-bit integer. */ 8405 if (INSTR (22, 22)) 8406 { 8407 double d = aarch64_get_FP_double (cpu, rs); 8408 uint64_t value = (uint64_t) d; 8409 8410 /* Do not raise an exception if we have reached ULONG_MAX. */ 8411 if (value != (1ULL << 63)) 8412 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG); 8413 8414 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 8415 } 8416 else 8417 { 8418 float f = aarch64_get_FP_float (cpu, rs); 8419 uint64_t value = (uint64_t) f; 8420 8421 /* Do not raise an exception if we have reached ULONG_MAX. */ 8422 if (value != (1ULL << 63)) 8423 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG); 8424 8425 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 8426 } 8427 } 8428 else 8429 { 8430 uint32_t value; 8431 8432 /* Convert to unsigned 32-bit integer. */ 8433 if (INSTR (22, 22)) 8434 { 8435 double d = aarch64_get_FP_double (cpu, rs); 8436 8437 value = (uint32_t) d; 8438 /* Do not raise an exception if we have reached UINT_MAX. */ 8439 if (value != (1UL << 31)) 8440 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT); 8441 } 8442 else 8443 { 8444 float f = aarch64_get_FP_float (cpu, rs); 8445 8446 value = (uint32_t) f; 8447 /* Do not raise an exception if we have reached UINT_MAX. */ 8448 if (value != (1UL << 31)) 8449 RAISE_EXCEPTIONS (f, value, FLOAT, UINT); 8450 } 8451 8452 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 8453 } 8454 } 8455 8456 static void 8457 do_UCVTF (sim_cpu *cpu) 8458 { 8459 /* instr[31] = size: 32-bit (0), 64-bit (1) 8460 instr[30,23] = 001 1110 0 8461 instr[22] = type: single (0)/ double (1) 8462 instr[21] = enable (0)/disable(1) precision 8463 instr[20,16] = 0 0011 8464 instr[15,10] = precision 8465 instr[9,5] = Rs 8466 instr[4,0] = Rd. */ 8467 8468 unsigned rs = INSTR (9, 5); 8469 unsigned rd = INSTR (4, 0); 8470 8471 NYI_assert (30, 23, 0x3C); 8472 NYI_assert (20, 16, 0x03); 8473 8474 if (INSTR (21, 21) != 1) 8475 HALT_NYI; 8476 8477 /* FIXME: Add exception raising. */ 8478 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8479 if (INSTR (31, 31)) 8480 { 8481 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP); 8482 8483 if (INSTR (22, 22)) 8484 aarch64_set_FP_double (cpu, rd, (double) value); 8485 else 8486 aarch64_set_FP_float (cpu, rd, (float) value); 8487 } 8488 else 8489 { 8490 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP); 8491 8492 if (INSTR (22, 22)) 8493 aarch64_set_FP_double (cpu, rd, (double) value); 8494 else 8495 aarch64_set_FP_float (cpu, rd, (float) value); 8496 } 8497 } 8498 8499 static void 8500 float_vector_move (sim_cpu *cpu) 8501 { 8502 /* instr[31,17] == 100 1111 0101 0111 8503 instr[16] ==> direction 0=> to GR, 1=> from GR 8504 instr[15,10] => ??? 8505 instr[9,5] ==> source 8506 instr[4,0] ==> dest. */ 8507 8508 unsigned rn = INSTR (9, 5); 8509 unsigned rd = INSTR (4, 0); 8510 8511 NYI_assert (31, 17, 0x4F57); 8512 8513 if (INSTR (15, 10) != 0) 8514 HALT_UNALLOC; 8515 8516 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8517 if (INSTR (16, 16)) 8518 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 8519 else 8520 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1)); 8521 } 8522 8523 static void 8524 dexSimpleFPIntegerConvert (sim_cpu *cpu) 8525 { 8526 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 8527 instr[30 = 0 8528 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC 8529 instr[28,25] = 1111 8530 instr[24] = 0 8531 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC 8532 instr[21] = 1 8533 instr[20,19] = rmode 8534 instr[18,16] = opcode 8535 instr[15,10] = 10 0000 */ 8536 8537 uint32_t rmode_opcode; 8538 uint32_t size_type; 8539 uint32_t type; 8540 uint32_t size; 8541 uint32_t S; 8542 8543 if (INSTR (31, 17) == 0x4F57) 8544 { 8545 float_vector_move (cpu); 8546 return; 8547 } 8548 8549 size = INSTR (31, 31); 8550 S = INSTR (29, 29); 8551 if (S != 0) 8552 HALT_UNALLOC; 8553 8554 type = INSTR (23, 22); 8555 if (type > 1) 8556 HALT_UNALLOC; 8557 8558 rmode_opcode = INSTR (20, 16); 8559 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */ 8560 8561 switch (rmode_opcode) 8562 { 8563 case 2: /* SCVTF. */ 8564 switch (size_type) 8565 { 8566 case 0: scvtf32 (cpu); return; 8567 case 1: scvtd32 (cpu); return; 8568 case 2: scvtf (cpu); return; 8569 case 3: scvtd (cpu); return; 8570 } 8571 8572 case 6: /* FMOV GR, Vec. */ 8573 switch (size_type) 8574 { 8575 case 0: gfmovs (cpu); return; 8576 case 3: gfmovd (cpu); return; 8577 default: HALT_UNALLOC; 8578 } 8579 8580 case 7: /* FMOV vec, GR. */ 8581 switch (size_type) 8582 { 8583 case 0: fgmovs (cpu); return; 8584 case 3: fgmovd (cpu); return; 8585 default: HALT_UNALLOC; 8586 } 8587 8588 case 24: /* FCVTZS. */ 8589 switch (size_type) 8590 { 8591 case 0: fcvtszs32 (cpu); return; 8592 case 1: fcvtszd32 (cpu); return; 8593 case 2: fcvtszs (cpu); return; 8594 case 3: fcvtszd (cpu); return; 8595 } 8596 8597 case 25: do_fcvtzu (cpu); return; 8598 case 3: do_UCVTF (cpu); return; 8599 8600 case 0: /* FCVTNS. */ 8601 case 1: /* FCVTNU. */ 8602 case 4: /* FCVTAS. */ 8603 case 5: /* FCVTAU. */ 8604 case 8: /* FCVPTS. */ 8605 case 9: /* FCVTPU. */ 8606 case 16: /* FCVTMS. */ 8607 case 17: /* FCVTMU. */ 8608 default: 8609 HALT_NYI; 8610 } 8611 } 8612 8613 static void 8614 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2) 8615 { 8616 uint32_t flags; 8617 8618 /* FIXME: Add exception raising. */ 8619 if (isnan (fvalue1) || isnan (fvalue2)) 8620 flags = C|V; 8621 else if (isinf (fvalue1) && isinf (fvalue2)) 8622 { 8623 /* Subtracting two infinities may give a NaN. We only need to compare 8624 the signs, which we can get from isinf. */ 8625 int result = isinf (fvalue1) - isinf (fvalue2); 8626 8627 if (result == 0) 8628 flags = Z|C; 8629 else if (result < 0) 8630 flags = N; 8631 else /* (result > 0). */ 8632 flags = C; 8633 } 8634 else 8635 { 8636 float result = fvalue1 - fvalue2; 8637 8638 if (result == 0.0) 8639 flags = Z|C; 8640 else if (result < 0) 8641 flags = N; 8642 else /* (result > 0). */ 8643 flags = C; 8644 } 8645 8646 aarch64_set_CPSR (cpu, flags); 8647 } 8648 8649 static void 8650 fcmps (sim_cpu *cpu) 8651 { 8652 unsigned sm = INSTR (20, 16); 8653 unsigned sn = INSTR ( 9, 5); 8654 8655 float fvalue1 = aarch64_get_FP_float (cpu, sn); 8656 float fvalue2 = aarch64_get_FP_float (cpu, sm); 8657 8658 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8659 set_flags_for_float_compare (cpu, fvalue1, fvalue2); 8660 } 8661 8662 /* Float compare to zero -- Invalid Operation exception 8663 only on signaling NaNs. */ 8664 static void 8665 fcmpzs (sim_cpu *cpu) 8666 { 8667 unsigned sn = INSTR ( 9, 5); 8668 float fvalue1 = aarch64_get_FP_float (cpu, sn); 8669 8670 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8671 set_flags_for_float_compare (cpu, fvalue1, 0.0f); 8672 } 8673 8674 /* Float compare -- Invalid Operation exception on all NaNs. */ 8675 static void 8676 fcmpes (sim_cpu *cpu) 8677 { 8678 unsigned sm = INSTR (20, 16); 8679 unsigned sn = INSTR ( 9, 5); 8680 8681 float fvalue1 = aarch64_get_FP_float (cpu, sn); 8682 float fvalue2 = aarch64_get_FP_float (cpu, sm); 8683 8684 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8685 set_flags_for_float_compare (cpu, fvalue1, fvalue2); 8686 } 8687 8688 /* Float compare to zero -- Invalid Operation exception on all NaNs. */ 8689 static void 8690 fcmpzes (sim_cpu *cpu) 8691 { 8692 unsigned sn = INSTR ( 9, 5); 8693 float fvalue1 = aarch64_get_FP_float (cpu, sn); 8694 8695 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8696 set_flags_for_float_compare (cpu, fvalue1, 0.0f); 8697 } 8698 8699 static void 8700 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2) 8701 { 8702 uint32_t flags; 8703 8704 /* FIXME: Add exception raising. */ 8705 if (isnan (dval1) || isnan (dval2)) 8706 flags = C|V; 8707 else if (isinf (dval1) && isinf (dval2)) 8708 { 8709 /* Subtracting two infinities may give a NaN. We only need to compare 8710 the signs, which we can get from isinf. */ 8711 int result = isinf (dval1) - isinf (dval2); 8712 8713 if (result == 0) 8714 flags = Z|C; 8715 else if (result < 0) 8716 flags = N; 8717 else /* (result > 0). */ 8718 flags = C; 8719 } 8720 else 8721 { 8722 double result = dval1 - dval2; 8723 8724 if (result == 0.0) 8725 flags = Z|C; 8726 else if (result < 0) 8727 flags = N; 8728 else /* (result > 0). */ 8729 flags = C; 8730 } 8731 8732 aarch64_set_CPSR (cpu, flags); 8733 } 8734 8735 /* Double compare -- Invalid Operation exception only on signaling NaNs. */ 8736 static void 8737 fcmpd (sim_cpu *cpu) 8738 { 8739 unsigned sm = INSTR (20, 16); 8740 unsigned sn = INSTR ( 9, 5); 8741 8742 double dvalue1 = aarch64_get_FP_double (cpu, sn); 8743 double dvalue2 = aarch64_get_FP_double (cpu, sm); 8744 8745 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8746 set_flags_for_double_compare (cpu, dvalue1, dvalue2); 8747 } 8748 8749 /* Double compare to zero -- Invalid Operation exception 8750 only on signaling NaNs. */ 8751 static void 8752 fcmpzd (sim_cpu *cpu) 8753 { 8754 unsigned sn = INSTR ( 9, 5); 8755 double dvalue1 = aarch64_get_FP_double (cpu, sn); 8756 8757 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8758 set_flags_for_double_compare (cpu, dvalue1, 0.0); 8759 } 8760 8761 /* Double compare -- Invalid Operation exception on all NaNs. */ 8762 static void 8763 fcmped (sim_cpu *cpu) 8764 { 8765 unsigned sm = INSTR (20, 16); 8766 unsigned sn = INSTR ( 9, 5); 8767 8768 double dvalue1 = aarch64_get_FP_double (cpu, sn); 8769 double dvalue2 = aarch64_get_FP_double (cpu, sm); 8770 8771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8772 set_flags_for_double_compare (cpu, dvalue1, dvalue2); 8773 } 8774 8775 /* Double compare to zero -- Invalid Operation exception on all NaNs. */ 8776 static void 8777 fcmpzed (sim_cpu *cpu) 8778 { 8779 unsigned sn = INSTR ( 9, 5); 8780 double dvalue1 = aarch64_get_FP_double (cpu, sn); 8781 8782 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8783 set_flags_for_double_compare (cpu, dvalue1, 0.0); 8784 } 8785 8786 static void 8787 dexSimpleFPCompare (sim_cpu *cpu) 8788 { 8789 /* assert instr[28,25] == 1111 8790 instr[30:24:21:13,10] = 0011000 8791 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC 8792 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC 8793 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC 8794 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC 8795 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE, 8796 01000 ==> FCMPZ, 11000 ==> FCMPEZ, 8797 ow ==> UNALLOC */ 8798 uint32_t dispatch; 8799 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); 8800 uint32_t type = INSTR (23, 22); 8801 uint32_t op = INSTR (15, 14); 8802 uint32_t op2_2_0 = INSTR (2, 0); 8803 8804 if (op2_2_0 != 0) 8805 HALT_UNALLOC; 8806 8807 if (M_S != 0) 8808 HALT_UNALLOC; 8809 8810 if (type > 1) 8811 HALT_UNALLOC; 8812 8813 if (op != 0) 8814 HALT_UNALLOC; 8815 8816 /* dispatch on type and top 2 bits of opcode. */ 8817 dispatch = (type << 2) | INSTR (4, 3); 8818 8819 switch (dispatch) 8820 { 8821 case 0: fcmps (cpu); return; 8822 case 1: fcmpzs (cpu); return; 8823 case 2: fcmpes (cpu); return; 8824 case 3: fcmpzes (cpu); return; 8825 case 4: fcmpd (cpu); return; 8826 case 5: fcmpzd (cpu); return; 8827 case 6: fcmped (cpu); return; 8828 case 7: fcmpzed (cpu); return; 8829 } 8830 } 8831 8832 static void 8833 do_scalar_FADDP (sim_cpu *cpu) 8834 { 8835 /* instr [31,23] = 0111 1110 0 8836 instr [22] = single(0)/double(1) 8837 instr [21,10] = 11 0000 1101 10 8838 instr [9,5] = Fn 8839 instr [4,0] = Fd. */ 8840 8841 unsigned Fn = INSTR (9, 5); 8842 unsigned Fd = INSTR (4, 0); 8843 8844 NYI_assert (31, 23, 0x0FC); 8845 NYI_assert (21, 10, 0xC36); 8846 8847 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8848 if (INSTR (22, 22)) 8849 { 8850 double val1 = aarch64_get_vec_double (cpu, Fn, 0); 8851 double val2 = aarch64_get_vec_double (cpu, Fn, 1); 8852 8853 aarch64_set_FP_double (cpu, Fd, val1 + val2); 8854 } 8855 else 8856 { 8857 float val1 = aarch64_get_vec_float (cpu, Fn, 0); 8858 float val2 = aarch64_get_vec_float (cpu, Fn, 1); 8859 8860 aarch64_set_FP_float (cpu, Fd, val1 + val2); 8861 } 8862 } 8863 8864 /* Floating point absolute difference. */ 8865 8866 static void 8867 do_scalar_FABD (sim_cpu *cpu) 8868 { 8869 /* instr [31,23] = 0111 1110 1 8870 instr [22] = float(0)/double(1) 8871 instr [21] = 1 8872 instr [20,16] = Rm 8873 instr [15,10] = 1101 01 8874 instr [9, 5] = Rn 8875 instr [4, 0] = Rd. */ 8876 8877 unsigned rm = INSTR (20, 16); 8878 unsigned rn = INSTR (9, 5); 8879 unsigned rd = INSTR (4, 0); 8880 8881 NYI_assert (31, 23, 0x0FD); 8882 NYI_assert (21, 21, 1); 8883 NYI_assert (15, 10, 0x35); 8884 8885 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8886 if (INSTR (22, 22)) 8887 aarch64_set_FP_double (cpu, rd, 8888 fabs (aarch64_get_FP_double (cpu, rn) 8889 - aarch64_get_FP_double (cpu, rm))); 8890 else 8891 aarch64_set_FP_float (cpu, rd, 8892 fabsf (aarch64_get_FP_float (cpu, rn) 8893 - aarch64_get_FP_float (cpu, rm))); 8894 } 8895 8896 static void 8897 do_scalar_CMGT (sim_cpu *cpu) 8898 { 8899 /* instr [31,21] = 0101 1110 111 8900 instr [20,16] = Rm 8901 instr [15,10] = 00 1101 8902 instr [9, 5] = Rn 8903 instr [4, 0] = Rd. */ 8904 8905 unsigned rm = INSTR (20, 16); 8906 unsigned rn = INSTR (9, 5); 8907 unsigned rd = INSTR (4, 0); 8908 8909 NYI_assert (31, 21, 0x2F7); 8910 NYI_assert (15, 10, 0x0D); 8911 8912 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8913 aarch64_set_vec_u64 (cpu, rd, 0, 8914 aarch64_get_vec_u64 (cpu, rn, 0) > 8915 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L); 8916 } 8917 8918 static void 8919 do_scalar_USHR (sim_cpu *cpu) 8920 { 8921 /* instr [31,23] = 0111 1111 0 8922 instr [22,16] = shift amount 8923 instr [15,10] = 0000 01 8924 instr [9, 5] = Rn 8925 instr [4, 0] = Rd. */ 8926 8927 unsigned amount = 128 - INSTR (22, 16); 8928 unsigned rn = INSTR (9, 5); 8929 unsigned rd = INSTR (4, 0); 8930 8931 NYI_assert (31, 23, 0x0FE); 8932 NYI_assert (15, 10, 0x01); 8933 8934 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8935 aarch64_set_vec_u64 (cpu, rd, 0, 8936 aarch64_get_vec_u64 (cpu, rn, 0) >> amount); 8937 } 8938 8939 static void 8940 do_scalar_SSHL (sim_cpu *cpu) 8941 { 8942 /* instr [31,21] = 0101 1110 111 8943 instr [20,16] = Rm 8944 instr [15,10] = 0100 01 8945 instr [9, 5] = Rn 8946 instr [4, 0] = Rd. */ 8947 8948 unsigned rm = INSTR (20, 16); 8949 unsigned rn = INSTR (9, 5); 8950 unsigned rd = INSTR (4, 0); 8951 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0); 8952 8953 NYI_assert (31, 21, 0x2F7); 8954 NYI_assert (15, 10, 0x11); 8955 8956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8957 if (shift >= 0) 8958 aarch64_set_vec_s64 (cpu, rd, 0, 8959 aarch64_get_vec_s64 (cpu, rn, 0) << shift); 8960 else 8961 aarch64_set_vec_s64 (cpu, rd, 0, 8962 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift); 8963 } 8964 8965 /* Floating point scalar compare greater than or equal to 0. */ 8966 static void 8967 do_scalar_FCMGE_zero (sim_cpu *cpu) 8968 { 8969 /* instr [31,23] = 0111 1110 1 8970 instr [22,22] = size 8971 instr [21,16] = 1000 00 8972 instr [15,10] = 1100 10 8973 instr [9, 5] = Rn 8974 instr [4, 0] = Rd. */ 8975 8976 unsigned size = INSTR (22, 22); 8977 unsigned rn = INSTR (9, 5); 8978 unsigned rd = INSTR (4, 0); 8979 8980 NYI_assert (31, 23, 0x0FD); 8981 NYI_assert (21, 16, 0x20); 8982 NYI_assert (15, 10, 0x32); 8983 8984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 8985 if (size) 8986 aarch64_set_vec_u64 (cpu, rd, 0, 8987 aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0); 8988 else 8989 aarch64_set_vec_u32 (cpu, rd, 0, 8990 aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0); 8991 } 8992 8993 /* Floating point scalar compare less than or equal to 0. */ 8994 static void 8995 do_scalar_FCMLE_zero (sim_cpu *cpu) 8996 { 8997 /* instr [31,23] = 0111 1110 1 8998 instr [22,22] = size 8999 instr [21,16] = 1000 00 9000 instr [15,10] = 1101 10 9001 instr [9, 5] = Rn 9002 instr [4, 0] = Rd. */ 9003 9004 unsigned size = INSTR (22, 22); 9005 unsigned rn = INSTR (9, 5); 9006 unsigned rd = INSTR (4, 0); 9007 9008 NYI_assert (31, 23, 0x0FD); 9009 NYI_assert (21, 16, 0x20); 9010 NYI_assert (15, 10, 0x36); 9011 9012 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9013 if (size) 9014 aarch64_set_vec_u64 (cpu, rd, 0, 9015 aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0); 9016 else 9017 aarch64_set_vec_u32 (cpu, rd, 0, 9018 aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0); 9019 } 9020 9021 /* Floating point scalar compare greater than 0. */ 9022 static void 9023 do_scalar_FCMGT_zero (sim_cpu *cpu) 9024 { 9025 /* instr [31,23] = 0101 1110 1 9026 instr [22,22] = size 9027 instr [21,16] = 1000 00 9028 instr [15,10] = 1100 10 9029 instr [9, 5] = Rn 9030 instr [4, 0] = Rd. */ 9031 9032 unsigned size = INSTR (22, 22); 9033 unsigned rn = INSTR (9, 5); 9034 unsigned rd = INSTR (4, 0); 9035 9036 NYI_assert (31, 23, 0x0BD); 9037 NYI_assert (21, 16, 0x20); 9038 NYI_assert (15, 10, 0x32); 9039 9040 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9041 if (size) 9042 aarch64_set_vec_u64 (cpu, rd, 0, 9043 aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0); 9044 else 9045 aarch64_set_vec_u32 (cpu, rd, 0, 9046 aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0); 9047 } 9048 9049 /* Floating point scalar compare equal to 0. */ 9050 static void 9051 do_scalar_FCMEQ_zero (sim_cpu *cpu) 9052 { 9053 /* instr [31,23] = 0101 1110 1 9054 instr [22,22] = size 9055 instr [21,16] = 1000 00 9056 instr [15,10] = 1101 10 9057 instr [9, 5] = Rn 9058 instr [4, 0] = Rd. */ 9059 9060 unsigned size = INSTR (22, 22); 9061 unsigned rn = INSTR (9, 5); 9062 unsigned rd = INSTR (4, 0); 9063 9064 NYI_assert (31, 23, 0x0BD); 9065 NYI_assert (21, 16, 0x20); 9066 NYI_assert (15, 10, 0x36); 9067 9068 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9069 if (size) 9070 aarch64_set_vec_u64 (cpu, rd, 0, 9071 aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0); 9072 else 9073 aarch64_set_vec_u32 (cpu, rd, 0, 9074 aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0); 9075 } 9076 9077 /* Floating point scalar compare less than 0. */ 9078 static void 9079 do_scalar_FCMLT_zero (sim_cpu *cpu) 9080 { 9081 /* instr [31,23] = 0101 1110 1 9082 instr [22,22] = size 9083 instr [21,16] = 1000 00 9084 instr [15,10] = 1110 10 9085 instr [9, 5] = Rn 9086 instr [4, 0] = Rd. */ 9087 9088 unsigned size = INSTR (22, 22); 9089 unsigned rn = INSTR (9, 5); 9090 unsigned rd = INSTR (4, 0); 9091 9092 NYI_assert (31, 23, 0x0BD); 9093 NYI_assert (21, 16, 0x20); 9094 NYI_assert (15, 10, 0x3A); 9095 9096 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9097 if (size) 9098 aarch64_set_vec_u64 (cpu, rd, 0, 9099 aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0); 9100 else 9101 aarch64_set_vec_u32 (cpu, rd, 0, 9102 aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0); 9103 } 9104 9105 static void 9106 do_scalar_shift (sim_cpu *cpu) 9107 { 9108 /* instr [31,23] = 0101 1111 0 9109 instr [22,16] = shift amount 9110 instr [15,10] = 0101 01 [SHL] 9111 instr [15,10] = 0000 01 [SSHR] 9112 instr [9, 5] = Rn 9113 instr [4, 0] = Rd. */ 9114 9115 unsigned rn = INSTR (9, 5); 9116 unsigned rd = INSTR (4, 0); 9117 unsigned amount; 9118 9119 NYI_assert (31, 23, 0x0BE); 9120 9121 if (INSTR (22, 22) == 0) 9122 HALT_UNALLOC; 9123 9124 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9125 switch (INSTR (15, 10)) 9126 { 9127 case 0x01: /* SSHR */ 9128 amount = 128 - INSTR (22, 16); 9129 aarch64_set_vec_s64 (cpu, rd, 0, 9130 aarch64_get_vec_s64 (cpu, rn, 0) >> amount); 9131 return; 9132 case 0x15: /* SHL */ 9133 amount = INSTR (22, 16) - 64; 9134 aarch64_set_vec_u64 (cpu, rd, 0, 9135 aarch64_get_vec_u64 (cpu, rn, 0) << amount); 9136 return; 9137 default: 9138 HALT_NYI; 9139 } 9140 } 9141 9142 /* FCMEQ FCMGT FCMGE. */ 9143 static void 9144 do_scalar_FCM (sim_cpu *cpu) 9145 { 9146 /* instr [31,30] = 01 9147 instr [29] = U 9148 instr [28,24] = 1 1110 9149 instr [23] = E 9150 instr [22] = size 9151 instr [21] = 1 9152 instr [20,16] = Rm 9153 instr [15,12] = 1110 9154 instr [11] = AC 9155 instr [10] = 1 9156 instr [9, 5] = Rn 9157 instr [4, 0] = Rd. */ 9158 9159 unsigned rm = INSTR (20, 16); 9160 unsigned rn = INSTR (9, 5); 9161 unsigned rd = INSTR (4, 0); 9162 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11); 9163 unsigned result; 9164 float val1; 9165 float val2; 9166 9167 NYI_assert (31, 30, 1); 9168 NYI_assert (28, 24, 0x1E); 9169 NYI_assert (21, 21, 1); 9170 NYI_assert (15, 12, 0xE); 9171 NYI_assert (10, 10, 1); 9172 9173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9174 if (INSTR (22, 22)) 9175 { 9176 double val1 = aarch64_get_FP_double (cpu, rn); 9177 double val2 = aarch64_get_FP_double (cpu, rm); 9178 9179 switch (EUac) 9180 { 9181 case 0: /* 000 */ 9182 result = val1 == val2; 9183 break; 9184 9185 case 3: /* 011 */ 9186 val1 = fabs (val1); 9187 val2 = fabs (val2); 9188 /* Fall through. */ 9189 case 2: /* 010 */ 9190 result = val1 >= val2; 9191 break; 9192 9193 case 7: /* 111 */ 9194 val1 = fabs (val1); 9195 val2 = fabs (val2); 9196 /* Fall through. */ 9197 case 6: /* 110 */ 9198 result = val1 > val2; 9199 break; 9200 9201 default: 9202 HALT_UNALLOC; 9203 } 9204 9205 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); 9206 return; 9207 } 9208 9209 val1 = aarch64_get_FP_float (cpu, rn); 9210 val2 = aarch64_get_FP_float (cpu, rm); 9211 9212 switch (EUac) 9213 { 9214 case 0: /* 000 */ 9215 result = val1 == val2; 9216 break; 9217 9218 case 3: /* 011 */ 9219 val1 = fabsf (val1); 9220 val2 = fabsf (val2); 9221 /* Fall through. */ 9222 case 2: /* 010 */ 9223 result = val1 >= val2; 9224 break; 9225 9226 case 7: /* 111 */ 9227 val1 = fabsf (val1); 9228 val2 = fabsf (val2); 9229 /* Fall through. */ 9230 case 6: /* 110 */ 9231 result = val1 > val2; 9232 break; 9233 9234 default: 9235 HALT_UNALLOC; 9236 } 9237 9238 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); 9239 } 9240 9241 /* An alias of DUP. */ 9242 static void 9243 do_scalar_MOV (sim_cpu *cpu) 9244 { 9245 /* instr [31,21] = 0101 1110 000 9246 instr [20,16] = imm5 9247 instr [15,10] = 0000 01 9248 instr [9, 5] = Rn 9249 instr [4, 0] = Rd. */ 9250 9251 unsigned rn = INSTR (9, 5); 9252 unsigned rd = INSTR (4, 0); 9253 unsigned index; 9254 9255 NYI_assert (31, 21, 0x2F0); 9256 NYI_assert (15, 10, 0x01); 9257 9258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9259 if (INSTR (16, 16)) 9260 { 9261 /* 8-bit. */ 9262 index = INSTR (20, 17); 9263 aarch64_set_vec_u8 9264 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index)); 9265 } 9266 else if (INSTR (17, 17)) 9267 { 9268 /* 16-bit. */ 9269 index = INSTR (20, 18); 9270 aarch64_set_vec_u16 9271 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index)); 9272 } 9273 else if (INSTR (18, 18)) 9274 { 9275 /* 32-bit. */ 9276 index = INSTR (20, 19); 9277 aarch64_set_vec_u32 9278 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index)); 9279 } 9280 else if (INSTR (19, 19)) 9281 { 9282 /* 64-bit. */ 9283 index = INSTR (20, 20); 9284 aarch64_set_vec_u64 9285 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index)); 9286 } 9287 else 9288 HALT_UNALLOC; 9289 } 9290 9291 static void 9292 do_scalar_NEG (sim_cpu *cpu) 9293 { 9294 /* instr [31,10] = 0111 1110 1110 0000 1011 10 9295 instr [9, 5] = Rn 9296 instr [4, 0] = Rd. */ 9297 9298 unsigned rn = INSTR (9, 5); 9299 unsigned rd = INSTR (4, 0); 9300 9301 NYI_assert (31, 10, 0x1FB82E); 9302 9303 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9304 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0)); 9305 } 9306 9307 static void 9308 do_scalar_USHL (sim_cpu *cpu) 9309 { 9310 /* instr [31,21] = 0111 1110 111 9311 instr [20,16] = Rm 9312 instr [15,10] = 0100 01 9313 instr [9, 5] = Rn 9314 instr [4, 0] = Rd. */ 9315 9316 unsigned rm = INSTR (20, 16); 9317 unsigned rn = INSTR (9, 5); 9318 unsigned rd = INSTR (4, 0); 9319 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0); 9320 9321 NYI_assert (31, 21, 0x3F7); 9322 NYI_assert (15, 10, 0x11); 9323 9324 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9325 if (shift >= 0) 9326 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift); 9327 else 9328 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift); 9329 } 9330 9331 static void 9332 do_double_add (sim_cpu *cpu) 9333 { 9334 /* instr [31,21] = 0101 1110 111 9335 instr [20,16] = Fn 9336 instr [15,10] = 1000 01 9337 instr [9,5] = Fm 9338 instr [4,0] = Fd. */ 9339 unsigned Fd; 9340 unsigned Fm; 9341 unsigned Fn; 9342 double val1; 9343 double val2; 9344 9345 NYI_assert (31, 21, 0x2F7); 9346 NYI_assert (15, 10, 0x21); 9347 9348 Fd = INSTR (4, 0); 9349 Fm = INSTR (9, 5); 9350 Fn = INSTR (20, 16); 9351 9352 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9353 val1 = aarch64_get_FP_double (cpu, Fm); 9354 val2 = aarch64_get_FP_double (cpu, Fn); 9355 9356 aarch64_set_FP_double (cpu, Fd, val1 + val2); 9357 } 9358 9359 static void 9360 do_scalar_UCVTF (sim_cpu *cpu) 9361 { 9362 /* instr [31,23] = 0111 1110 0 9363 instr [22] = single(0)/double(1) 9364 instr [21,10] = 10 0001 1101 10 9365 instr [9,5] = rn 9366 instr [4,0] = rd. */ 9367 9368 unsigned rn = INSTR (9, 5); 9369 unsigned rd = INSTR (4, 0); 9370 9371 NYI_assert (31, 23, 0x0FC); 9372 NYI_assert (21, 10, 0x876); 9373 9374 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9375 if (INSTR (22, 22)) 9376 { 9377 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0); 9378 9379 aarch64_set_vec_double (cpu, rd, 0, (double) val); 9380 } 9381 else 9382 { 9383 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0); 9384 9385 aarch64_set_vec_float (cpu, rd, 0, (float) val); 9386 } 9387 } 9388 9389 static void 9390 do_scalar_vec (sim_cpu *cpu) 9391 { 9392 /* instr [30] = 1. */ 9393 /* instr [28,25] = 1111. */ 9394 switch (INSTR (31, 23)) 9395 { 9396 case 0xBC: 9397 switch (INSTR (15, 10)) 9398 { 9399 case 0x01: do_scalar_MOV (cpu); return; 9400 case 0x39: do_scalar_FCM (cpu); return; 9401 case 0x3B: do_scalar_FCM (cpu); return; 9402 } 9403 break; 9404 9405 case 0xBE: do_scalar_shift (cpu); return; 9406 9407 case 0xFC: 9408 switch (INSTR (15, 10)) 9409 { 9410 case 0x36: 9411 switch (INSTR (21, 16)) 9412 { 9413 case 0x30: do_scalar_FADDP (cpu); return; 9414 case 0x21: do_scalar_UCVTF (cpu); return; 9415 } 9416 HALT_NYI; 9417 case 0x39: do_scalar_FCM (cpu); return; 9418 case 0x3B: do_scalar_FCM (cpu); return; 9419 } 9420 break; 9421 9422 case 0xFD: 9423 switch (INSTR (15, 10)) 9424 { 9425 case 0x0D: do_scalar_CMGT (cpu); return; 9426 case 0x11: do_scalar_USHL (cpu); return; 9427 case 0x2E: do_scalar_NEG (cpu); return; 9428 case 0x32: do_scalar_FCMGE_zero (cpu); return; 9429 case 0x35: do_scalar_FABD (cpu); return; 9430 case 0x36: do_scalar_FCMLE_zero (cpu); return; 9431 case 0x39: do_scalar_FCM (cpu); return; 9432 case 0x3B: do_scalar_FCM (cpu); return; 9433 default: 9434 HALT_NYI; 9435 } 9436 9437 case 0xFE: do_scalar_USHR (cpu); return; 9438 9439 case 0xBD: 9440 switch (INSTR (15, 10)) 9441 { 9442 case 0x21: do_double_add (cpu); return; 9443 case 0x11: do_scalar_SSHL (cpu); return; 9444 case 0x32: do_scalar_FCMGT_zero (cpu); return; 9445 case 0x36: do_scalar_FCMEQ_zero (cpu); return; 9446 case 0x3A: do_scalar_FCMLT_zero (cpu); return; 9447 default: 9448 HALT_NYI; 9449 } 9450 9451 default: 9452 HALT_NYI; 9453 } 9454 } 9455 9456 static void 9457 dexAdvSIMD1 (sim_cpu *cpu) 9458 { 9459 /* instr [28,25] = 1 111. */ 9460 9461 /* We are currently only interested in the basic 9462 scalar fp routines which all have bit 30 = 0. */ 9463 if (INSTR (30, 30)) 9464 do_scalar_vec (cpu); 9465 9466 /* instr[24] is set for FP data processing 3-source and clear for 9467 all other basic scalar fp instruction groups. */ 9468 else if (INSTR (24, 24)) 9469 dexSimpleFPDataProc3Source (cpu); 9470 9471 /* instr[21] is clear for floating <-> fixed conversions and set for 9472 all other basic scalar fp instruction groups. */ 9473 else if (!INSTR (21, 21)) 9474 dexSimpleFPFixedConvert (cpu); 9475 9476 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source 9477 11 ==> cond select, 00 ==> other. */ 9478 else 9479 switch (INSTR (11, 10)) 9480 { 9481 case 1: dexSimpleFPCondCompare (cpu); return; 9482 case 2: dexSimpleFPDataProc2Source (cpu); return; 9483 case 3: dexSimpleFPCondSelect (cpu); return; 9484 9485 default: 9486 /* Now an ordered cascade of tests. 9487 FP immediate has instr [12] == 1. 9488 FP compare has instr [13] == 1. 9489 FP Data Proc 1 Source has instr [14] == 1. 9490 FP floating <--> integer conversions has instr [15] == 0. */ 9491 if (INSTR (12, 12)) 9492 dexSimpleFPImmediate (cpu); 9493 9494 else if (INSTR (13, 13)) 9495 dexSimpleFPCompare (cpu); 9496 9497 else if (INSTR (14, 14)) 9498 dexSimpleFPDataProc1Source (cpu); 9499 9500 else if (!INSTR (15, 15)) 9501 dexSimpleFPIntegerConvert (cpu); 9502 9503 else 9504 /* If we get here then instr[15] == 1 which means UNALLOC. */ 9505 HALT_UNALLOC; 9506 } 9507 } 9508 9509 /* PC relative addressing. */ 9510 9511 static void 9512 pcadr (sim_cpu *cpu) 9513 { 9514 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP 9515 instr[30,29] = immlo 9516 instr[23,5] = immhi. */ 9517 uint64_t address; 9518 unsigned rd = INSTR (4, 0); 9519 uint32_t isPage = INSTR (31, 31); 9520 union { int64_t u64; uint64_t s64; } imm; 9521 uint64_t offset; 9522 9523 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5); 9524 offset = imm.u64; 9525 offset = (offset << 2) | INSTR (30, 29); 9526 9527 address = aarch64_get_PC (cpu); 9528 9529 if (isPage) 9530 { 9531 offset <<= 12; 9532 address &= ~0xfff; 9533 } 9534 9535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9536 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset); 9537 } 9538 9539 /* Specific decode and execute for group Data Processing Immediate. */ 9540 9541 static void 9542 dexPCRelAddressing (sim_cpu *cpu) 9543 { 9544 /* assert instr[28,24] = 10000. */ 9545 pcadr (cpu); 9546 } 9547 9548 /* Immediate logical. 9549 The bimm32/64 argument is constructed by replicating a 2, 4, 8, 9550 16, 32 or 64 bit sequence pulled out at decode and possibly 9551 inverting it.. 9552 9553 N.B. the output register (dest) can normally be Xn or SP 9554 the exception occurs for flag setting instructions which may 9555 only use Xn for the output (dest). The input register can 9556 never be SP. */ 9557 9558 /* 32 bit and immediate. */ 9559 static void 9560 and32 (sim_cpu *cpu, uint32_t bimm) 9561 { 9562 unsigned rn = INSTR (9, 5); 9563 unsigned rd = INSTR (4, 0); 9564 9565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9566 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9567 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm); 9568 } 9569 9570 /* 64 bit and immediate. */ 9571 static void 9572 and64 (sim_cpu *cpu, uint64_t bimm) 9573 { 9574 unsigned rn = INSTR (9, 5); 9575 unsigned rd = INSTR (4, 0); 9576 9577 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9578 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9579 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm); 9580 } 9581 9582 /* 32 bit and immediate set flags. */ 9583 static void 9584 ands32 (sim_cpu *cpu, uint32_t bimm) 9585 { 9586 unsigned rn = INSTR (9, 5); 9587 unsigned rd = INSTR (4, 0); 9588 9589 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 9590 uint32_t value2 = bimm; 9591 9592 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9593 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9594 set_flags_for_binop32 (cpu, value1 & value2); 9595 } 9596 9597 /* 64 bit and immediate set flags. */ 9598 static void 9599 ands64 (sim_cpu *cpu, uint64_t bimm) 9600 { 9601 unsigned rn = INSTR (9, 5); 9602 unsigned rd = INSTR (4, 0); 9603 9604 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 9605 uint64_t value2 = bimm; 9606 9607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9608 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9609 set_flags_for_binop64 (cpu, value1 & value2); 9610 } 9611 9612 /* 32 bit exclusive or immediate. */ 9613 static void 9614 eor32 (sim_cpu *cpu, uint32_t bimm) 9615 { 9616 unsigned rn = INSTR (9, 5); 9617 unsigned rd = INSTR (4, 0); 9618 9619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9620 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9621 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm); 9622 } 9623 9624 /* 64 bit exclusive or immediate. */ 9625 static void 9626 eor64 (sim_cpu *cpu, uint64_t bimm) 9627 { 9628 unsigned rn = INSTR (9, 5); 9629 unsigned rd = INSTR (4, 0); 9630 9631 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9632 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9633 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm); 9634 } 9635 9636 /* 32 bit or immediate. */ 9637 static void 9638 orr32 (sim_cpu *cpu, uint32_t bimm) 9639 { 9640 unsigned rn = INSTR (9, 5); 9641 unsigned rd = INSTR (4, 0); 9642 9643 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9644 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9645 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm); 9646 } 9647 9648 /* 64 bit or immediate. */ 9649 static void 9650 orr64 (sim_cpu *cpu, uint64_t bimm) 9651 { 9652 unsigned rn = INSTR (9, 5); 9653 unsigned rd = INSTR (4, 0); 9654 9655 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9656 aarch64_set_reg_u64 (cpu, rd, SP_OK, 9657 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm); 9658 } 9659 9660 /* Logical shifted register. 9661 These allow an optional LSL, ASR, LSR or ROR to the second source 9662 register with a count up to the register bit count. 9663 N.B register args may not be SP. */ 9664 9665 /* 32 bit AND shifted register. */ 9666 static void 9667 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9668 { 9669 unsigned rm = INSTR (20, 16); 9670 unsigned rn = INSTR (9, 5); 9671 unsigned rd = INSTR (4, 0); 9672 9673 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9674 aarch64_set_reg_u64 9675 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9676 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9677 } 9678 9679 /* 64 bit AND shifted register. */ 9680 static void 9681 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9682 { 9683 unsigned rm = INSTR (20, 16); 9684 unsigned rn = INSTR (9, 5); 9685 unsigned rd = INSTR (4, 0); 9686 9687 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9688 aarch64_set_reg_u64 9689 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9690 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9691 } 9692 9693 /* 32 bit AND shifted register setting flags. */ 9694 static void 9695 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9696 { 9697 unsigned rm = INSTR (20, 16); 9698 unsigned rn = INSTR (9, 5); 9699 unsigned rd = INSTR (4, 0); 9700 9701 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 9702 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 9703 shift, count); 9704 9705 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9706 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9707 set_flags_for_binop32 (cpu, value1 & value2); 9708 } 9709 9710 /* 64 bit AND shifted register setting flags. */ 9711 static void 9712 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9713 { 9714 unsigned rm = INSTR (20, 16); 9715 unsigned rn = INSTR (9, 5); 9716 unsigned rd = INSTR (4, 0); 9717 9718 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 9719 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 9720 shift, count); 9721 9722 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9723 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9724 set_flags_for_binop64 (cpu, value1 & value2); 9725 } 9726 9727 /* 32 bit BIC shifted register. */ 9728 static void 9729 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9730 { 9731 unsigned rm = INSTR (20, 16); 9732 unsigned rn = INSTR (9, 5); 9733 unsigned rd = INSTR (4, 0); 9734 9735 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9736 aarch64_set_reg_u64 9737 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9738 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9739 } 9740 9741 /* 64 bit BIC shifted register. */ 9742 static void 9743 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9744 { 9745 unsigned rm = INSTR (20, 16); 9746 unsigned rn = INSTR (9, 5); 9747 unsigned rd = INSTR (4, 0); 9748 9749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9750 aarch64_set_reg_u64 9751 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9752 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9753 } 9754 9755 /* 32 bit BIC shifted register setting flags. */ 9756 static void 9757 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9758 { 9759 unsigned rm = INSTR (20, 16); 9760 unsigned rn = INSTR (9, 5); 9761 unsigned rd = INSTR (4, 0); 9762 9763 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 9764 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), 9765 shift, count); 9766 9767 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9768 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9769 set_flags_for_binop32 (cpu, value1 & value2); 9770 } 9771 9772 /* 64 bit BIC shifted register setting flags. */ 9773 static void 9774 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9775 { 9776 unsigned rm = INSTR (20, 16); 9777 unsigned rn = INSTR (9, 5); 9778 unsigned rd = INSTR (4, 0); 9779 9780 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 9781 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), 9782 shift, count); 9783 9784 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9785 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); 9786 set_flags_for_binop64 (cpu, value1 & value2); 9787 } 9788 9789 /* 32 bit EON shifted register. */ 9790 static void 9791 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9792 { 9793 unsigned rm = INSTR (20, 16); 9794 unsigned rn = INSTR (9, 5); 9795 unsigned rd = INSTR (4, 0); 9796 9797 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9798 aarch64_set_reg_u64 9799 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9800 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9801 } 9802 9803 /* 64 bit EON shifted register. */ 9804 static void 9805 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9806 { 9807 unsigned rm = INSTR (20, 16); 9808 unsigned rn = INSTR (9, 5); 9809 unsigned rd = INSTR (4, 0); 9810 9811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9812 aarch64_set_reg_u64 9813 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9814 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9815 } 9816 9817 /* 32 bit EOR shifted register. */ 9818 static void 9819 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9820 { 9821 unsigned rm = INSTR (20, 16); 9822 unsigned rn = INSTR (9, 5); 9823 unsigned rd = INSTR (4, 0); 9824 9825 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9826 aarch64_set_reg_u64 9827 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9828 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9829 } 9830 9831 /* 64 bit EOR shifted register. */ 9832 static void 9833 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9834 { 9835 unsigned rm = INSTR (20, 16); 9836 unsigned rn = INSTR (9, 5); 9837 unsigned rd = INSTR (4, 0); 9838 9839 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9840 aarch64_set_reg_u64 9841 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9842 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9843 } 9844 9845 /* 32 bit ORR shifted register. */ 9846 static void 9847 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9848 { 9849 unsigned rm = INSTR (20, 16); 9850 unsigned rn = INSTR (9, 5); 9851 unsigned rd = INSTR (4, 0); 9852 9853 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9854 aarch64_set_reg_u64 9855 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9856 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9857 } 9858 9859 /* 64 bit ORR shifted register. */ 9860 static void 9861 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9862 { 9863 unsigned rm = INSTR (20, 16); 9864 unsigned rn = INSTR (9, 5); 9865 unsigned rd = INSTR (4, 0); 9866 9867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9868 aarch64_set_reg_u64 9869 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9870 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9871 } 9872 9873 /* 32 bit ORN shifted register. */ 9874 static void 9875 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9876 { 9877 unsigned rm = INSTR (20, 16); 9878 unsigned rn = INSTR (9, 5); 9879 unsigned rd = INSTR (4, 0); 9880 9881 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9882 aarch64_set_reg_u64 9883 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) 9884 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); 9885 } 9886 9887 /* 64 bit ORN shifted register. */ 9888 static void 9889 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count) 9890 { 9891 unsigned rm = INSTR (20, 16); 9892 unsigned rn = INSTR (9, 5); 9893 unsigned rd = INSTR (4, 0); 9894 9895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9896 aarch64_set_reg_u64 9897 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) 9898 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); 9899 } 9900 9901 static void 9902 dexLogicalImmediate (sim_cpu *cpu) 9903 { 9904 /* assert instr[28,23] = 1001000 9905 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 9906 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS 9907 instr[22] = N : used to construct immediate mask 9908 instr[21,16] = immr 9909 instr[15,10] = imms 9910 instr[9,5] = Rn 9911 instr[4,0] = Rd */ 9912 9913 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ 9914 uint32_t size = INSTR (31, 31); 9915 uint32_t N = INSTR (22, 22); 9916 /* uint32_t immr = INSTR (21, 16);. */ 9917 /* uint32_t imms = INSTR (15, 10);. */ 9918 uint32_t index = INSTR (22, 10); 9919 uint64_t bimm64 = LITable [index]; 9920 uint32_t dispatch = INSTR (30, 29); 9921 9922 if (~size & N) 9923 HALT_UNALLOC; 9924 9925 if (!bimm64) 9926 HALT_UNALLOC; 9927 9928 if (size == 0) 9929 { 9930 uint32_t bimm = (uint32_t) bimm64; 9931 9932 switch (dispatch) 9933 { 9934 case 0: and32 (cpu, bimm); return; 9935 case 1: orr32 (cpu, bimm); return; 9936 case 2: eor32 (cpu, bimm); return; 9937 case 3: ands32 (cpu, bimm); return; 9938 } 9939 } 9940 else 9941 { 9942 switch (dispatch) 9943 { 9944 case 0: and64 (cpu, bimm64); return; 9945 case 1: orr64 (cpu, bimm64); return; 9946 case 2: eor64 (cpu, bimm64); return; 9947 case 3: ands64 (cpu, bimm64); return; 9948 } 9949 } 9950 HALT_UNALLOC; 9951 } 9952 9953 /* Immediate move. 9954 The uimm argument is a 16 bit value to be inserted into the 9955 target register the pos argument locates the 16 bit word in the 9956 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2, 9957 3} for 64 bit. 9958 N.B register arg may not be SP so it should be. 9959 accessed using the setGZRegisterXXX accessors. */ 9960 9961 /* 32 bit move 16 bit immediate zero remaining shorts. */ 9962 static void 9963 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos) 9964 { 9965 unsigned rd = INSTR (4, 0); 9966 9967 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9968 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16)); 9969 } 9970 9971 /* 64 bit move 16 bit immediate zero remaining shorts. */ 9972 static void 9973 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos) 9974 { 9975 unsigned rd = INSTR (4, 0); 9976 9977 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9978 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16)); 9979 } 9980 9981 /* 32 bit move 16 bit immediate negated. */ 9982 static void 9983 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos) 9984 { 9985 unsigned rd = INSTR (4, 0); 9986 9987 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9988 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU)); 9989 } 9990 9991 /* 64 bit move 16 bit immediate negated. */ 9992 static void 9993 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos) 9994 { 9995 unsigned rd = INSTR (4, 0); 9996 9997 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 9998 aarch64_set_reg_u64 9999 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16)) 10000 ^ 0xffffffffffffffffULL)); 10001 } 10002 10003 /* 32 bit move 16 bit immediate keep remaining shorts. */ 10004 static void 10005 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos) 10006 { 10007 unsigned rd = INSTR (4, 0); 10008 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP); 10009 uint32_t value = val << (pos * 16); 10010 uint32_t mask = ~(0xffffU << (pos * 16)); 10011 10012 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10013 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); 10014 } 10015 10016 /* 64 bit move 16 it immediate keep remaining shorts. */ 10017 static void 10018 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos) 10019 { 10020 unsigned rd = INSTR (4, 0); 10021 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP); 10022 uint64_t value = (uint64_t) val << (pos * 16); 10023 uint64_t mask = ~(0xffffULL << (pos * 16)); 10024 10025 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10026 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); 10027 } 10028 10029 static void 10030 dexMoveWideImmediate (sim_cpu *cpu) 10031 { 10032 /* assert instr[28:23] = 100101 10033 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 10034 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK 10035 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48 10036 instr[20,5] = uimm16 10037 instr[4,0] = Rd */ 10038 10039 /* N.B. the (multiple of 16) shift is applied by the called routine, 10040 we just pass the multiplier. */ 10041 10042 uint32_t imm; 10043 uint32_t size = INSTR (31, 31); 10044 uint32_t op = INSTR (30, 29); 10045 uint32_t shift = INSTR (22, 21); 10046 10047 /* 32 bit can only shift 0 or 1 lot of 16. 10048 anything else is an unallocated instruction. */ 10049 if (size == 0 && (shift > 1)) 10050 HALT_UNALLOC; 10051 10052 if (op == 1) 10053 HALT_UNALLOC; 10054 10055 imm = INSTR (20, 5); 10056 10057 if (size == 0) 10058 { 10059 if (op == 0) 10060 movn32 (cpu, imm, shift); 10061 else if (op == 2) 10062 movz32 (cpu, imm, shift); 10063 else 10064 movk32 (cpu, imm, shift); 10065 } 10066 else 10067 { 10068 if (op == 0) 10069 movn64 (cpu, imm, shift); 10070 else if (op == 2) 10071 movz64 (cpu, imm, shift); 10072 else 10073 movk64 (cpu, imm, shift); 10074 } 10075 } 10076 10077 /* Bitfield operations. 10078 These take a pair of bit positions r and s which are in {0..31} 10079 or {0..63} depending on the instruction word size. 10080 N.B register args may not be SP. */ 10081 10082 /* OK, we start with ubfm which just needs to pick 10083 some bits out of source zero the rest and write 10084 the result to dest. Just need two logical shifts. */ 10085 10086 /* 32 bit bitfield move, left and right of affected zeroed 10087 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ 10088 static void 10089 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) 10090 { 10091 unsigned rd; 10092 unsigned rn = INSTR (9, 5); 10093 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 10094 10095 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ 10096 if (r <= s) 10097 { 10098 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0. 10099 We want only bits s:xxx:r at the bottom of the word 10100 so we LSL bit s up to bit 31 i.e. by 31 - s 10101 and then we LSR to bring bit 31 down to bit s - r 10102 i.e. by 31 + r - s. */ 10103 value <<= 31 - s; 10104 value >>= 31 + r - s; 10105 } 10106 else 10107 { 10108 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0 10109 We want only bits s:xxx:0 starting at it 31-(r-1) 10110 so we LSL bit s up to bit 31 i.e. by 31 - s 10111 and then we LSL to bring bit 31 down to 31-(r-1)+s 10112 i.e. by r - (s + 1). */ 10113 value <<= 31 - s; 10114 value >>= r - (s + 1); 10115 } 10116 10117 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10118 rd = INSTR (4, 0); 10119 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 10120 } 10121 10122 /* 64 bit bitfield move, left and right of affected zeroed 10123 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ 10124 static void 10125 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s) 10126 { 10127 unsigned rd; 10128 unsigned rn = INSTR (9, 5); 10129 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 10130 10131 if (r <= s) 10132 { 10133 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0. 10134 We want only bits s:xxx:r at the bottom of the word. 10135 So we LSL bit s up to bit 63 i.e. by 63 - s 10136 and then we LSR to bring bit 63 down to bit s - r 10137 i.e. by 63 + r - s. */ 10138 value <<= 63 - s; 10139 value >>= 63 + r - s; 10140 } 10141 else 10142 { 10143 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0. 10144 We want only bits s:xxx:0 starting at it 63-(r-1). 10145 So we LSL bit s up to bit 63 i.e. by 63 - s 10146 and then we LSL to bring bit 63 down to 63-(r-1)+s 10147 i.e. by r - (s + 1). */ 10148 value <<= 63 - s; 10149 value >>= r - (s + 1); 10150 } 10151 10152 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10153 rd = INSTR (4, 0); 10154 aarch64_set_reg_u64 (cpu, rd, NO_SP, value); 10155 } 10156 10157 /* The signed versions need to insert sign bits 10158 on the left of the inserted bit field. so we do 10159 much the same as the unsigned version except we 10160 use an arithmetic shift right -- this just means 10161 we need to operate on signed values. */ 10162 10163 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */ 10164 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ 10165 static void 10166 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) 10167 { 10168 unsigned rd; 10169 unsigned rn = INSTR (9, 5); 10170 /* as per ubfm32 but use an ASR instead of an LSR. */ 10171 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP); 10172 10173 if (r <= s) 10174 { 10175 value <<= 31 - s; 10176 value >>= 31 + r - s; 10177 } 10178 else 10179 { 10180 value <<= 31 - s; 10181 value >>= r - (s + 1); 10182 } 10183 10184 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10185 rd = INSTR (4, 0); 10186 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); 10187 } 10188 10189 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */ 10190 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ 10191 static void 10192 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s) 10193 { 10194 unsigned rd; 10195 unsigned rn = INSTR (9, 5); 10196 /* acpu per ubfm but use an ASR instead of an LSR. */ 10197 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP); 10198 10199 if (r <= s) 10200 { 10201 value <<= 63 - s; 10202 value >>= 63 + r - s; 10203 } 10204 else 10205 { 10206 value <<= 63 - s; 10207 value >>= r - (s + 1); 10208 } 10209 10210 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10211 rd = INSTR (4, 0); 10212 aarch64_set_reg_s64 (cpu, rd, NO_SP, value); 10213 } 10214 10215 /* Finally, these versions leave non-affected bits 10216 as is. so we need to generate the bits as per 10217 ubfm and also generate a mask to pick the 10218 bits from the original and computed values. */ 10219 10220 /* 32 bit bitfield move, non-affected bits left as is. 10221 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ 10222 static void 10223 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) 10224 { 10225 unsigned rn = INSTR (9, 5); 10226 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 10227 uint32_t mask = -1; 10228 unsigned rd; 10229 uint32_t value2; 10230 10231 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ 10232 if (r <= s) 10233 { 10234 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0. 10235 We want only bits s:xxx:r at the bottom of the word 10236 so we LSL bit s up to bit 31 i.e. by 31 - s 10237 and then we LSR to bring bit 31 down to bit s - r 10238 i.e. by 31 + r - s. */ 10239 value <<= 31 - s; 10240 value >>= 31 + r - s; 10241 /* the mask must include the same bits. */ 10242 mask <<= 31 - s; 10243 mask >>= 31 + r - s; 10244 } 10245 else 10246 { 10247 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0. 10248 We want only bits s:xxx:0 starting at it 31-(r-1) 10249 so we LSL bit s up to bit 31 i.e. by 31 - s 10250 and then we LSL to bring bit 31 down to 31-(r-1)+s 10251 i.e. by r - (s + 1). */ 10252 value <<= 31 - s; 10253 value >>= r - (s + 1); 10254 /* The mask must include the same bits. */ 10255 mask <<= 31 - s; 10256 mask >>= r - (s + 1); 10257 } 10258 10259 rd = INSTR (4, 0); 10260 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP); 10261 10262 value2 &= ~mask; 10263 value2 |= value; 10264 10265 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10266 aarch64_set_reg_u64 10267 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value); 10268 } 10269 10270 /* 64 bit bitfield move, non-affected bits left as is. 10271 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ 10272 static void 10273 bfm (sim_cpu *cpu, uint32_t r, uint32_t s) 10274 { 10275 unsigned rd; 10276 unsigned rn = INSTR (9, 5); 10277 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 10278 uint64_t mask = 0xffffffffffffffffULL; 10279 10280 if (r <= s) 10281 { 10282 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0. 10283 We want only bits s:xxx:r at the bottom of the word 10284 so we LSL bit s up to bit 63 i.e. by 63 - s 10285 and then we LSR to bring bit 63 down to bit s - r 10286 i.e. by 63 + r - s. */ 10287 value <<= 63 - s; 10288 value >>= 63 + r - s; 10289 /* The mask must include the same bits. */ 10290 mask <<= 63 - s; 10291 mask >>= 63 + r - s; 10292 } 10293 else 10294 { 10295 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0 10296 We want only bits s:xxx:0 starting at it 63-(r-1) 10297 so we LSL bit s up to bit 63 i.e. by 63 - s 10298 and then we LSL to bring bit 63 down to 63-(r-1)+s 10299 i.e. by r - (s + 1). */ 10300 value <<= 63 - s; 10301 value >>= r - (s + 1); 10302 /* The mask must include the same bits. */ 10303 mask <<= 63 - s; 10304 mask >>= r - (s + 1); 10305 } 10306 10307 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10308 rd = INSTR (4, 0); 10309 aarch64_set_reg_u64 10310 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value); 10311 } 10312 10313 static void 10314 dexBitfieldImmediate (sim_cpu *cpu) 10315 { 10316 /* assert instr[28:23] = 100110 10317 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 10318 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC 10319 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC 10320 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit 10321 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit 10322 instr[9,5] = Rn 10323 instr[4,0] = Rd */ 10324 10325 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ 10326 uint32_t dispatch; 10327 uint32_t imms; 10328 uint32_t size = INSTR (31, 31); 10329 uint32_t N = INSTR (22, 22); 10330 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */ 10331 /* or else we have an UNALLOC. */ 10332 uint32_t immr = INSTR (21, 16); 10333 10334 if (~size & N) 10335 HALT_UNALLOC; 10336 10337 if (!size && uimm (immr, 5, 5)) 10338 HALT_UNALLOC; 10339 10340 imms = INSTR (15, 10); 10341 if (!size && uimm (imms, 5, 5)) 10342 HALT_UNALLOC; 10343 10344 /* Switch on combined size and op. */ 10345 dispatch = INSTR (31, 29); 10346 switch (dispatch) 10347 { 10348 case 0: sbfm32 (cpu, immr, imms); return; 10349 case 1: bfm32 (cpu, immr, imms); return; 10350 case 2: ubfm32 (cpu, immr, imms); return; 10351 case 4: sbfm (cpu, immr, imms); return; 10352 case 5: bfm (cpu, immr, imms); return; 10353 case 6: ubfm (cpu, immr, imms); return; 10354 default: HALT_UNALLOC; 10355 } 10356 } 10357 10358 static void 10359 do_EXTR_32 (sim_cpu *cpu) 10360 { 10361 /* instr[31:21] = 00010011100 10362 instr[20,16] = Rm 10363 instr[15,10] = imms : 0xxxxx for 32 bit 10364 instr[9,5] = Rn 10365 instr[4,0] = Rd */ 10366 unsigned rm = INSTR (20, 16); 10367 unsigned imms = INSTR (15, 10) & 31; 10368 unsigned rn = INSTR ( 9, 5); 10369 unsigned rd = INSTR ( 4, 0); 10370 uint64_t val1; 10371 uint64_t val2; 10372 10373 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP); 10374 val1 >>= imms; 10375 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP); 10376 val2 <<= (32 - imms); 10377 10378 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 10379 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2); 10380 } 10381 10382 static void 10383 do_EXTR_64 (sim_cpu *cpu) 10384 { 10385 /* instr[31:21] = 10010011100 10386 instr[20,16] = Rm 10387 instr[15,10] = imms 10388 instr[9,5] = Rn 10389 instr[4,0] = Rd */ 10390 unsigned rm = INSTR (20, 16); 10391 unsigned imms = INSTR (15, 10) & 63; 10392 unsigned rn = INSTR ( 9, 5); 10393 unsigned rd = INSTR ( 4, 0); 10394 uint64_t val; 10395 10396 val = aarch64_get_reg_u64 (cpu, rm, NO_SP); 10397 val >>= imms; 10398 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms)); 10399 10400 aarch64_set_reg_u64 (cpu, rd, NO_SP, val); 10401 } 10402 10403 static void 10404 dexExtractImmediate (sim_cpu *cpu) 10405 { 10406 /* assert instr[28:23] = 100111 10407 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 10408 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC 10409 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC 10410 instr[21] = op0 : must be 0 or UNALLOC 10411 instr[20,16] = Rm 10412 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit 10413 instr[9,5] = Rn 10414 instr[4,0] = Rd */ 10415 10416 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ 10417 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */ 10418 uint32_t dispatch; 10419 uint32_t size = INSTR (31, 31); 10420 uint32_t N = INSTR (22, 22); 10421 /* 32 bit operations must have imms[5] = 0 10422 or else we have an UNALLOC. */ 10423 uint32_t imms = INSTR (15, 10); 10424 10425 if (size ^ N) 10426 HALT_UNALLOC; 10427 10428 if (!size && uimm (imms, 5, 5)) 10429 HALT_UNALLOC; 10430 10431 /* Switch on combined size and op. */ 10432 dispatch = INSTR (31, 29); 10433 10434 if (dispatch == 0) 10435 do_EXTR_32 (cpu); 10436 10437 else if (dispatch == 4) 10438 do_EXTR_64 (cpu); 10439 10440 else if (dispatch == 1) 10441 HALT_NYI; 10442 else 10443 HALT_UNALLOC; 10444 } 10445 10446 static void 10447 dexDPImm (sim_cpu *cpu) 10448 { 10449 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); 10450 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001 10451 bits [25,23] of a DPImm are the secondary dispatch vector. */ 10452 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu)); 10453 10454 switch (group2) 10455 { 10456 case DPIMM_PCADR_000: 10457 case DPIMM_PCADR_001: 10458 dexPCRelAddressing (cpu); 10459 return; 10460 10461 case DPIMM_ADDSUB_010: 10462 case DPIMM_ADDSUB_011: 10463 dexAddSubtractImmediate (cpu); 10464 return; 10465 10466 case DPIMM_LOG_100: 10467 dexLogicalImmediate (cpu); 10468 return; 10469 10470 case DPIMM_MOV_101: 10471 dexMoveWideImmediate (cpu); 10472 return; 10473 10474 case DPIMM_BITF_110: 10475 dexBitfieldImmediate (cpu); 10476 return; 10477 10478 case DPIMM_EXTR_111: 10479 dexExtractImmediate (cpu); 10480 return; 10481 10482 default: 10483 /* Should never reach here. */ 10484 HALT_NYI; 10485 } 10486 } 10487 10488 static void 10489 dexLoadUnscaledImmediate (sim_cpu *cpu) 10490 { 10491 /* instr[29,24] == 111_00 10492 instr[21] == 0 10493 instr[11,10] == 00 10494 instr[31,30] = size 10495 instr[26] = V 10496 instr[23,22] = opc 10497 instr[20,12] = simm9 10498 instr[9,5] = rn may be SP. */ 10499 /* unsigned rt = INSTR (4, 0); */ 10500 uint32_t V = INSTR (26, 26); 10501 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); 10502 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); 10503 10504 if (!V) 10505 { 10506 /* GReg operations. */ 10507 switch (dispatch) 10508 { 10509 case 0: sturb (cpu, imm); return; 10510 case 1: ldurb32 (cpu, imm); return; 10511 case 2: ldursb64 (cpu, imm); return; 10512 case 3: ldursb32 (cpu, imm); return; 10513 case 4: sturh (cpu, imm); return; 10514 case 5: ldurh32 (cpu, imm); return; 10515 case 6: ldursh64 (cpu, imm); return; 10516 case 7: ldursh32 (cpu, imm); return; 10517 case 8: stur32 (cpu, imm); return; 10518 case 9: ldur32 (cpu, imm); return; 10519 case 10: ldursw (cpu, imm); return; 10520 case 12: stur64 (cpu, imm); return; 10521 case 13: ldur64 (cpu, imm); return; 10522 10523 case 14: 10524 /* PRFUM NYI. */ 10525 HALT_NYI; 10526 10527 default: 10528 case 11: 10529 case 15: 10530 HALT_UNALLOC; 10531 } 10532 } 10533 10534 /* FReg operations. */ 10535 switch (dispatch) 10536 { 10537 case 2: fsturq (cpu, imm); return; 10538 case 3: fldurq (cpu, imm); return; 10539 case 8: fsturs (cpu, imm); return; 10540 case 9: fldurs (cpu, imm); return; 10541 case 12: fsturd (cpu, imm); return; 10542 case 13: fldurd (cpu, imm); return; 10543 10544 case 0: /* STUR 8 bit FP. */ 10545 case 1: /* LDUR 8 bit FP. */ 10546 case 4: /* STUR 16 bit FP. */ 10547 case 5: /* LDUR 8 bit FP. */ 10548 HALT_NYI; 10549 10550 default: 10551 case 6: 10552 case 7: 10553 case 10: 10554 case 11: 10555 case 14: 10556 case 15: 10557 HALT_UNALLOC; 10558 } 10559 } 10560 10561 /* N.B. A preliminary note regarding all the ldrs<x>32 10562 instructions 10563 10564 The signed value loaded by these instructions is cast to unsigned 10565 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the 10566 64 bit element of the GReg union. this performs a 32 bit sign extension 10567 (as required) but avoids 64 bit sign extension, thus ensuring that the 10568 top half of the register word is zero. this is what the spec demands 10569 when a 32 bit load occurs. */ 10570 10571 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */ 10572 static void 10573 ldrsb32_abs (sim_cpu *cpu, uint32_t offset) 10574 { 10575 unsigned int rn = INSTR (9, 5); 10576 unsigned int rt = INSTR (4, 0); 10577 10578 /* The target register may not be SP but the source may be 10579 there is no scaling required for a byte load. */ 10580 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; 10581 aarch64_set_reg_u64 (cpu, rt, NO_SP, 10582 (int64_t) aarch64_get_mem_s8 (cpu, address)); 10583 } 10584 10585 /* 32 bit load sign-extended byte scaled or unscaled zero- 10586 or sign-extended 32-bit register offset. */ 10587 static void 10588 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10589 { 10590 unsigned int rm = INSTR (20, 16); 10591 unsigned int rn = INSTR (9, 5); 10592 unsigned int rt = INSTR (4, 0); 10593 10594 /* rn may reference SP, rm and rt must reference ZR. */ 10595 10596 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10597 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10598 extension); 10599 10600 /* There is no scaling required for a byte load. */ 10601 aarch64_set_reg_u64 10602 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address 10603 + displacement)); 10604 } 10605 10606 /* 32 bit load sign-extended byte unscaled signed 9 bit with 10607 pre- or post-writeback. */ 10608 static void 10609 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 10610 { 10611 uint64_t address; 10612 unsigned int rn = INSTR (9, 5); 10613 unsigned int rt = INSTR (4, 0); 10614 10615 if (rn == rt && wb != NoWriteBack) 10616 HALT_UNALLOC; 10617 10618 address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10619 10620 if (wb == Pre) 10621 address += offset; 10622 10623 aarch64_set_reg_u64 (cpu, rt, NO_SP, 10624 (int64_t) aarch64_get_mem_s8 (cpu, address)); 10625 10626 if (wb == Post) 10627 address += offset; 10628 10629 if (wb != NoWriteBack) 10630 aarch64_set_reg_u64 (cpu, rn, NO_SP, address); 10631 } 10632 10633 /* 8 bit store scaled. */ 10634 static void 10635 fstrb_abs (sim_cpu *cpu, uint32_t offset) 10636 { 10637 unsigned st = INSTR (4, 0); 10638 unsigned rn = INSTR (9, 5); 10639 10640 aarch64_set_mem_u8 (cpu, 10641 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, 10642 aarch64_get_vec_u8 (cpu, st, 0)); 10643 } 10644 10645 /* 8 bit store scaled or unscaled zero- or 10646 sign-extended 8-bit register offset. */ 10647 static void 10648 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10649 { 10650 unsigned rm = INSTR (20, 16); 10651 unsigned rn = INSTR (9, 5); 10652 unsigned st = INSTR (4, 0); 10653 10654 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10655 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10656 extension); 10657 uint64_t displacement = scaling == Scaled ? extended : 0; 10658 10659 aarch64_set_mem_u8 10660 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0)); 10661 } 10662 10663 /* 16 bit store scaled. */ 10664 static void 10665 fstrh_abs (sim_cpu *cpu, uint32_t offset) 10666 { 10667 unsigned st = INSTR (4, 0); 10668 unsigned rn = INSTR (9, 5); 10669 10670 aarch64_set_mem_u16 10671 (cpu, 10672 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16), 10673 aarch64_get_vec_u16 (cpu, st, 0)); 10674 } 10675 10676 /* 16 bit store scaled or unscaled zero- 10677 or sign-extended 16-bit register offset. */ 10678 static void 10679 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10680 { 10681 unsigned rm = INSTR (20, 16); 10682 unsigned rn = INSTR (9, 5); 10683 unsigned st = INSTR (4, 0); 10684 10685 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10686 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10687 extension); 10688 uint64_t displacement = OPT_SCALE (extended, 16, scaling); 10689 10690 aarch64_set_mem_u16 10691 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0)); 10692 } 10693 10694 /* 32 bit store scaled unsigned 12 bit. */ 10695 static void 10696 fstrs_abs (sim_cpu *cpu, uint32_t offset) 10697 { 10698 unsigned st = INSTR (4, 0); 10699 unsigned rn = INSTR (9, 5); 10700 10701 aarch64_set_mem_u32 10702 (cpu, 10703 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32), 10704 aarch64_get_vec_u32 (cpu, st, 0)); 10705 } 10706 10707 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ 10708 static void 10709 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 10710 { 10711 unsigned rn = INSTR (9, 5); 10712 unsigned st = INSTR (4, 0); 10713 10714 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10715 10716 if (wb != Post) 10717 address += offset; 10718 10719 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0)); 10720 10721 if (wb == Post) 10722 address += offset; 10723 10724 if (wb != NoWriteBack) 10725 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 10726 } 10727 10728 /* 32 bit store scaled or unscaled zero- 10729 or sign-extended 32-bit register offset. */ 10730 static void 10731 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10732 { 10733 unsigned rm = INSTR (20, 16); 10734 unsigned rn = INSTR (9, 5); 10735 unsigned st = INSTR (4, 0); 10736 10737 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10738 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10739 extension); 10740 uint64_t displacement = OPT_SCALE (extended, 32, scaling); 10741 10742 aarch64_set_mem_u32 10743 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0)); 10744 } 10745 10746 /* 64 bit store scaled unsigned 12 bit. */ 10747 static void 10748 fstrd_abs (sim_cpu *cpu, uint32_t offset) 10749 { 10750 unsigned st = INSTR (4, 0); 10751 unsigned rn = INSTR (9, 5); 10752 10753 aarch64_set_mem_u64 10754 (cpu, 10755 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64), 10756 aarch64_get_vec_u64 (cpu, st, 0)); 10757 } 10758 10759 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ 10760 static void 10761 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 10762 { 10763 unsigned rn = INSTR (9, 5); 10764 unsigned st = INSTR (4, 0); 10765 10766 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10767 10768 if (wb != Post) 10769 address += offset; 10770 10771 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0)); 10772 10773 if (wb == Post) 10774 address += offset; 10775 10776 if (wb != NoWriteBack) 10777 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 10778 } 10779 10780 /* 64 bit store scaled or unscaled zero- 10781 or sign-extended 32-bit register offset. */ 10782 static void 10783 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10784 { 10785 unsigned rm = INSTR (20, 16); 10786 unsigned rn = INSTR (9, 5); 10787 unsigned st = INSTR (4, 0); 10788 10789 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10790 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10791 extension); 10792 uint64_t displacement = OPT_SCALE (extended, 64, scaling); 10793 10794 aarch64_set_mem_u64 10795 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0)); 10796 } 10797 10798 /* 128 bit store scaled unsigned 12 bit. */ 10799 static void 10800 fstrq_abs (sim_cpu *cpu, uint32_t offset) 10801 { 10802 FRegister a; 10803 unsigned st = INSTR (4, 0); 10804 unsigned rn = INSTR (9, 5); 10805 uint64_t addr; 10806 10807 aarch64_get_FP_long_double (cpu, st, & a); 10808 10809 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); 10810 aarch64_set_mem_long_double (cpu, addr, a); 10811 } 10812 10813 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */ 10814 static void 10815 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) 10816 { 10817 FRegister a; 10818 unsigned rn = INSTR (9, 5); 10819 unsigned st = INSTR (4, 0); 10820 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10821 10822 if (wb != Post) 10823 address += offset; 10824 10825 aarch64_get_FP_long_double (cpu, st, & a); 10826 aarch64_set_mem_long_double (cpu, address, a); 10827 10828 if (wb == Post) 10829 address += offset; 10830 10831 if (wb != NoWriteBack) 10832 aarch64_set_reg_u64 (cpu, rn, SP_OK, address); 10833 } 10834 10835 /* 128 bit store scaled or unscaled zero- 10836 or sign-extended 32-bit register offset. */ 10837 static void 10838 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10839 { 10840 unsigned rm = INSTR (20, 16); 10841 unsigned rn = INSTR (9, 5); 10842 unsigned st = INSTR (4, 0); 10843 10844 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); 10845 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), 10846 extension); 10847 uint64_t displacement = OPT_SCALE (extended, 128, scaling); 10848 10849 FRegister a; 10850 10851 aarch64_get_FP_long_double (cpu, st, & a); 10852 aarch64_set_mem_long_double (cpu, address + displacement, a); 10853 } 10854 10855 static void 10856 dexLoadImmediatePrePost (sim_cpu *cpu) 10857 { 10858 /* instr[31,30] = size 10859 instr[29,27] = 111 10860 instr[26] = V 10861 instr[25,24] = 00 10862 instr[23,22] = opc 10863 instr[21] = 0 10864 instr[20,12] = simm9 10865 instr[11] = wb : 0 ==> Post, 1 ==> Pre 10866 instr[10] = 0 10867 instr[9,5] = Rn may be SP. 10868 instr[4,0] = Rt */ 10869 10870 uint32_t V = INSTR (26, 26); 10871 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); 10872 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); 10873 WriteBack wb = INSTR (11, 11); 10874 10875 if (!V) 10876 { 10877 /* GReg operations. */ 10878 switch (dispatch) 10879 { 10880 case 0: strb_wb (cpu, imm, wb); return; 10881 case 1: ldrb32_wb (cpu, imm, wb); return; 10882 case 2: ldrsb_wb (cpu, imm, wb); return; 10883 case 3: ldrsb32_wb (cpu, imm, wb); return; 10884 case 4: strh_wb (cpu, imm, wb); return; 10885 case 5: ldrh32_wb (cpu, imm, wb); return; 10886 case 6: ldrsh64_wb (cpu, imm, wb); return; 10887 case 7: ldrsh32_wb (cpu, imm, wb); return; 10888 case 8: str32_wb (cpu, imm, wb); return; 10889 case 9: ldr32_wb (cpu, imm, wb); return; 10890 case 10: ldrsw_wb (cpu, imm, wb); return; 10891 case 12: str_wb (cpu, imm, wb); return; 10892 case 13: ldr_wb (cpu, imm, wb); return; 10893 10894 default: 10895 case 11: 10896 case 14: 10897 case 15: 10898 HALT_UNALLOC; 10899 } 10900 } 10901 10902 /* FReg operations. */ 10903 switch (dispatch) 10904 { 10905 case 2: fstrq_wb (cpu, imm, wb); return; 10906 case 3: fldrq_wb (cpu, imm, wb); return; 10907 case 8: fstrs_wb (cpu, imm, wb); return; 10908 case 9: fldrs_wb (cpu, imm, wb); return; 10909 case 12: fstrd_wb (cpu, imm, wb); return; 10910 case 13: fldrd_wb (cpu, imm, wb); return; 10911 10912 case 0: /* STUR 8 bit FP. */ 10913 case 1: /* LDUR 8 bit FP. */ 10914 case 4: /* STUR 16 bit FP. */ 10915 case 5: /* LDUR 8 bit FP. */ 10916 HALT_NYI; 10917 10918 default: 10919 case 6: 10920 case 7: 10921 case 10: 10922 case 11: 10923 case 14: 10924 case 15: 10925 HALT_UNALLOC; 10926 } 10927 } 10928 10929 static void 10930 dexLoadRegisterOffset (sim_cpu *cpu) 10931 { 10932 /* instr[31,30] = size 10933 instr[29,27] = 111 10934 instr[26] = V 10935 instr[25,24] = 00 10936 instr[23,22] = opc 10937 instr[21] = 1 10938 instr[20,16] = rm 10939 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL, 10940 110 ==> SXTW, 111 ==> SXTX, 10941 ow ==> RESERVED 10942 instr[12] = scaled 10943 instr[11,10] = 10 10944 instr[9,5] = rn 10945 instr[4,0] = rt. */ 10946 10947 uint32_t V = INSTR (26, 26); 10948 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); 10949 Scaling scale = INSTR (12, 12); 10950 Extension extensionType = INSTR (15, 13); 10951 10952 /* Check for illegal extension types. */ 10953 if (uimm (extensionType, 1, 1) == 0) 10954 HALT_UNALLOC; 10955 10956 if (extensionType == UXTX || extensionType == SXTX) 10957 extensionType = NoExtension; 10958 10959 if (!V) 10960 { 10961 /* GReg operations. */ 10962 switch (dispatch) 10963 { 10964 case 0: strb_scale_ext (cpu, scale, extensionType); return; 10965 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return; 10966 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return; 10967 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return; 10968 case 4: strh_scale_ext (cpu, scale, extensionType); return; 10969 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return; 10970 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return; 10971 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return; 10972 case 8: str32_scale_ext (cpu, scale, extensionType); return; 10973 case 9: ldr32_scale_ext (cpu, scale, extensionType); return; 10974 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return; 10975 case 12: str_scale_ext (cpu, scale, extensionType); return; 10976 case 13: ldr_scale_ext (cpu, scale, extensionType); return; 10977 case 14: prfm_scale_ext (cpu, scale, extensionType); return; 10978 10979 default: 10980 case 11: 10981 case 15: 10982 HALT_UNALLOC; 10983 } 10984 } 10985 10986 /* FReg operations. */ 10987 switch (dispatch) 10988 { 10989 case 1: /* LDUR 8 bit FP. */ 10990 HALT_NYI; 10991 case 3: fldrq_scale_ext (cpu, scale, extensionType); return; 10992 case 5: /* LDUR 8 bit FP. */ 10993 HALT_NYI; 10994 case 9: fldrs_scale_ext (cpu, scale, extensionType); return; 10995 case 13: fldrd_scale_ext (cpu, scale, extensionType); return; 10996 10997 case 0: fstrb_scale_ext (cpu, scale, extensionType); return; 10998 case 2: fstrq_scale_ext (cpu, scale, extensionType); return; 10999 case 4: fstrh_scale_ext (cpu, scale, extensionType); return; 11000 case 8: fstrs_scale_ext (cpu, scale, extensionType); return; 11001 case 12: fstrd_scale_ext (cpu, scale, extensionType); return; 11002 11003 default: 11004 case 6: 11005 case 7: 11006 case 10: 11007 case 11: 11008 case 14: 11009 case 15: 11010 HALT_UNALLOC; 11011 } 11012 } 11013 11014 static void 11015 dexLoadUnsignedImmediate (sim_cpu *cpu) 11016 { 11017 /* instr[29,24] == 111_01 11018 instr[31,30] = size 11019 instr[26] = V 11020 instr[23,22] = opc 11021 instr[21,10] = uimm12 : unsigned immediate offset 11022 instr[9,5] = rn may be SP. 11023 instr[4,0] = rt. */ 11024 11025 uint32_t V = INSTR (26,26); 11026 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); 11027 uint32_t imm = INSTR (21, 10); 11028 11029 if (!V) 11030 { 11031 /* GReg operations. */ 11032 switch (dispatch) 11033 { 11034 case 0: strb_abs (cpu, imm); return; 11035 case 1: ldrb32_abs (cpu, imm); return; 11036 case 2: ldrsb_abs (cpu, imm); return; 11037 case 3: ldrsb32_abs (cpu, imm); return; 11038 case 4: strh_abs (cpu, imm); return; 11039 case 5: ldrh32_abs (cpu, imm); return; 11040 case 6: ldrsh_abs (cpu, imm); return; 11041 case 7: ldrsh32_abs (cpu, imm); return; 11042 case 8: str32_abs (cpu, imm); return; 11043 case 9: ldr32_abs (cpu, imm); return; 11044 case 10: ldrsw_abs (cpu, imm); return; 11045 case 12: str_abs (cpu, imm); return; 11046 case 13: ldr_abs (cpu, imm); return; 11047 case 14: prfm_abs (cpu, imm); return; 11048 11049 default: 11050 case 11: 11051 case 15: 11052 HALT_UNALLOC; 11053 } 11054 } 11055 11056 /* FReg operations. */ 11057 switch (dispatch) 11058 { 11059 case 0: fstrb_abs (cpu, imm); return; 11060 case 4: fstrh_abs (cpu, imm); return; 11061 case 8: fstrs_abs (cpu, imm); return; 11062 case 12: fstrd_abs (cpu, imm); return; 11063 case 2: fstrq_abs (cpu, imm); return; 11064 11065 case 1: fldrb_abs (cpu, imm); return; 11066 case 5: fldrh_abs (cpu, imm); return; 11067 case 9: fldrs_abs (cpu, imm); return; 11068 case 13: fldrd_abs (cpu, imm); return; 11069 case 3: fldrq_abs (cpu, imm); return; 11070 11071 default: 11072 case 6: 11073 case 7: 11074 case 10: 11075 case 11: 11076 case 14: 11077 case 15: 11078 HALT_UNALLOC; 11079 } 11080 } 11081 11082 static void 11083 dexLoadExclusive (sim_cpu *cpu) 11084 { 11085 /* assert instr[29:24] = 001000; 11086 instr[31,30] = size 11087 instr[23] = 0 if exclusive 11088 instr[22] = L : 1 if load, 0 if store 11089 instr[21] = 1 if pair 11090 instr[20,16] = Rs 11091 instr[15] = o0 : 1 if ordered 11092 instr[14,10] = Rt2 11093 instr[9,5] = Rn 11094 instr[4.0] = Rt. */ 11095 11096 switch (INSTR (22, 21)) 11097 { 11098 case 2: ldxr (cpu); return; 11099 case 0: stxr (cpu); return; 11100 default: HALT_NYI; 11101 } 11102 } 11103 11104 static void 11105 dexLoadOther (sim_cpu *cpu) 11106 { 11107 uint32_t dispatch; 11108 11109 /* instr[29,25] = 111_0 11110 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate 11111 instr[21:11,10] is the secondary dispatch. */ 11112 if (INSTR (24, 24)) 11113 { 11114 dexLoadUnsignedImmediate (cpu); 11115 return; 11116 } 11117 11118 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10)); 11119 switch (dispatch) 11120 { 11121 case 0: dexLoadUnscaledImmediate (cpu); return; 11122 case 1: dexLoadImmediatePrePost (cpu); return; 11123 case 3: dexLoadImmediatePrePost (cpu); return; 11124 case 6: dexLoadRegisterOffset (cpu); return; 11125 11126 default: 11127 case 2: 11128 case 4: 11129 case 5: 11130 case 7: 11131 HALT_NYI; 11132 } 11133 } 11134 11135 static void 11136 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11137 { 11138 unsigned rn = INSTR (14, 10); 11139 unsigned rd = INSTR (9, 5); 11140 unsigned rm = INSTR (4, 0); 11141 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11142 11143 if ((rn == rd || rm == rd) && wb != NoWriteBack) 11144 HALT_UNALLOC; /* ??? */ 11145 11146 offset <<= 2; 11147 11148 if (wb != Post) 11149 address += offset; 11150 11151 aarch64_set_mem_u32 (cpu, address, 11152 aarch64_get_reg_u32 (cpu, rm, NO_SP)); 11153 aarch64_set_mem_u32 (cpu, address + 4, 11154 aarch64_get_reg_u32 (cpu, rn, NO_SP)); 11155 11156 if (wb == Post) 11157 address += offset; 11158 11159 if (wb != NoWriteBack) 11160 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11161 } 11162 11163 static void 11164 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11165 { 11166 unsigned rn = INSTR (14, 10); 11167 unsigned rd = INSTR (9, 5); 11168 unsigned rm = INSTR (4, 0); 11169 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11170 11171 if ((rn == rd || rm == rd) && wb != NoWriteBack) 11172 HALT_UNALLOC; /* ??? */ 11173 11174 offset <<= 3; 11175 11176 if (wb != Post) 11177 address += offset; 11178 11179 aarch64_set_mem_u64 (cpu, address, 11180 aarch64_get_reg_u64 (cpu, rm, NO_SP)); 11181 aarch64_set_mem_u64 (cpu, address + 8, 11182 aarch64_get_reg_u64 (cpu, rn, NO_SP)); 11183 11184 if (wb == Post) 11185 address += offset; 11186 11187 if (wb != NoWriteBack) 11188 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11189 } 11190 11191 static void 11192 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11193 { 11194 unsigned rn = INSTR (14, 10); 11195 unsigned rd = INSTR (9, 5); 11196 unsigned rm = INSTR (4, 0); 11197 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11198 11199 /* Treat this as unalloc to make sure we don't do it. */ 11200 if (rn == rm) 11201 HALT_UNALLOC; 11202 11203 offset <<= 2; 11204 11205 if (wb != Post) 11206 address += offset; 11207 11208 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address)); 11209 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4)); 11210 11211 if (wb == Post) 11212 address += offset; 11213 11214 if (wb != NoWriteBack) 11215 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11216 } 11217 11218 static void 11219 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11220 { 11221 unsigned rn = INSTR (14, 10); 11222 unsigned rd = INSTR (9, 5); 11223 unsigned rm = INSTR (4, 0); 11224 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11225 11226 /* Treat this as unalloc to make sure we don't do it. */ 11227 if (rn == rm) 11228 HALT_UNALLOC; 11229 11230 offset <<= 2; 11231 11232 if (wb != Post) 11233 address += offset; 11234 11235 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address)); 11236 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4)); 11237 11238 if (wb == Post) 11239 address += offset; 11240 11241 if (wb != NoWriteBack) 11242 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11243 } 11244 11245 static void 11246 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) 11247 { 11248 unsigned rn = INSTR (14, 10); 11249 unsigned rd = INSTR (9, 5); 11250 unsigned rm = INSTR (4, 0); 11251 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11252 11253 /* Treat this as unalloc to make sure we don't do it. */ 11254 if (rn == rm) 11255 HALT_UNALLOC; 11256 11257 offset <<= 3; 11258 11259 if (wb != Post) 11260 address += offset; 11261 11262 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address)); 11263 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8)); 11264 11265 if (wb == Post) 11266 address += offset; 11267 11268 if (wb != NoWriteBack) 11269 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11270 } 11271 11272 static void 11273 dex_load_store_pair_gr (sim_cpu *cpu) 11274 { 11275 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit) 11276 instr[29,25] = instruction encoding: 101_0 11277 instr[26] = V : 1 if fp 0 if gp 11278 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre) 11279 instr[22] = load/store (1=> load) 11280 instr[21,15] = signed, scaled, offset 11281 instr[14,10] = Rn 11282 instr[ 9, 5] = Rd 11283 instr[ 4, 0] = Rm. */ 11284 11285 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22)); 11286 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); 11287 11288 switch (dispatch) 11289 { 11290 case 2: store_pair_u32 (cpu, offset, Post); return; 11291 case 3: load_pair_u32 (cpu, offset, Post); return; 11292 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return; 11293 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return; 11294 case 6: store_pair_u32 (cpu, offset, Pre); return; 11295 case 7: load_pair_u32 (cpu, offset, Pre); return; 11296 11297 case 11: load_pair_s32 (cpu, offset, Post); return; 11298 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return; 11299 case 15: load_pair_s32 (cpu, offset, Pre); return; 11300 11301 case 18: store_pair_u64 (cpu, offset, Post); return; 11302 case 19: load_pair_u64 (cpu, offset, Post); return; 11303 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return; 11304 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return; 11305 case 22: store_pair_u64 (cpu, offset, Pre); return; 11306 case 23: load_pair_u64 (cpu, offset, Pre); return; 11307 11308 default: 11309 HALT_UNALLOC; 11310 } 11311 } 11312 11313 static void 11314 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) 11315 { 11316 unsigned rn = INSTR (14, 10); 11317 unsigned rd = INSTR (9, 5); 11318 unsigned rm = INSTR (4, 0); 11319 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11320 11321 offset <<= 2; 11322 11323 if (wb != Post) 11324 address += offset; 11325 11326 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0)); 11327 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0)); 11328 11329 if (wb == Post) 11330 address += offset; 11331 11332 if (wb != NoWriteBack) 11333 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11334 } 11335 11336 static void 11337 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) 11338 { 11339 unsigned rn = INSTR (14, 10); 11340 unsigned rd = INSTR (9, 5); 11341 unsigned rm = INSTR (4, 0); 11342 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11343 11344 offset <<= 3; 11345 11346 if (wb != Post) 11347 address += offset; 11348 11349 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0)); 11350 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0)); 11351 11352 if (wb == Post) 11353 address += offset; 11354 11355 if (wb != NoWriteBack) 11356 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11357 } 11358 11359 static void 11360 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) 11361 { 11362 FRegister a; 11363 unsigned rn = INSTR (14, 10); 11364 unsigned rd = INSTR (9, 5); 11365 unsigned rm = INSTR (4, 0); 11366 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11367 11368 offset <<= 4; 11369 11370 if (wb != Post) 11371 address += offset; 11372 11373 aarch64_get_FP_long_double (cpu, rm, & a); 11374 aarch64_set_mem_long_double (cpu, address, a); 11375 aarch64_get_FP_long_double (cpu, rn, & a); 11376 aarch64_set_mem_long_double (cpu, address + 16, a); 11377 11378 if (wb == Post) 11379 address += offset; 11380 11381 if (wb != NoWriteBack) 11382 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11383 } 11384 11385 static void 11386 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) 11387 { 11388 unsigned rn = INSTR (14, 10); 11389 unsigned rd = INSTR (9, 5); 11390 unsigned rm = INSTR (4, 0); 11391 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11392 11393 if (rm == rn) 11394 HALT_UNALLOC; 11395 11396 offset <<= 2; 11397 11398 if (wb != Post) 11399 address += offset; 11400 11401 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address)); 11402 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4)); 11403 11404 if (wb == Post) 11405 address += offset; 11406 11407 if (wb != NoWriteBack) 11408 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11409 } 11410 11411 static void 11412 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) 11413 { 11414 unsigned rn = INSTR (14, 10); 11415 unsigned rd = INSTR (9, 5); 11416 unsigned rm = INSTR (4, 0); 11417 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11418 11419 if (rm == rn) 11420 HALT_UNALLOC; 11421 11422 offset <<= 3; 11423 11424 if (wb != Post) 11425 address += offset; 11426 11427 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address)); 11428 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8)); 11429 11430 if (wb == Post) 11431 address += offset; 11432 11433 if (wb != NoWriteBack) 11434 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11435 } 11436 11437 static void 11438 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) 11439 { 11440 FRegister a; 11441 unsigned rn = INSTR (14, 10); 11442 unsigned rd = INSTR (9, 5); 11443 unsigned rm = INSTR (4, 0); 11444 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); 11445 11446 if (rm == rn) 11447 HALT_UNALLOC; 11448 11449 offset <<= 4; 11450 11451 if (wb != Post) 11452 address += offset; 11453 11454 aarch64_get_mem_long_double (cpu, address, & a); 11455 aarch64_set_FP_long_double (cpu, rm, a); 11456 aarch64_get_mem_long_double (cpu, address + 16, & a); 11457 aarch64_set_FP_long_double (cpu, rn, a); 11458 11459 if (wb == Post) 11460 address += offset; 11461 11462 if (wb != NoWriteBack) 11463 aarch64_set_reg_u64 (cpu, rd, SP_OK, address); 11464 } 11465 11466 static void 11467 dex_load_store_pair_fp (sim_cpu *cpu) 11468 { 11469 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit) 11470 instr[29,25] = instruction encoding 11471 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre) 11472 instr[22] = load/store (1=> load) 11473 instr[21,15] = signed, scaled, offset 11474 instr[14,10] = Rn 11475 instr[ 9, 5] = Rd 11476 instr[ 4, 0] = Rm */ 11477 11478 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22)); 11479 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); 11480 11481 switch (dispatch) 11482 { 11483 case 2: store_pair_float (cpu, offset, Post); return; 11484 case 3: load_pair_float (cpu, offset, Post); return; 11485 case 4: store_pair_float (cpu, offset, NoWriteBack); return; 11486 case 5: load_pair_float (cpu, offset, NoWriteBack); return; 11487 case 6: store_pair_float (cpu, offset, Pre); return; 11488 case 7: load_pair_float (cpu, offset, Pre); return; 11489 11490 case 10: store_pair_double (cpu, offset, Post); return; 11491 case 11: load_pair_double (cpu, offset, Post); return; 11492 case 12: store_pair_double (cpu, offset, NoWriteBack); return; 11493 case 13: load_pair_double (cpu, offset, NoWriteBack); return; 11494 case 14: store_pair_double (cpu, offset, Pre); return; 11495 case 15: load_pair_double (cpu, offset, Pre); return; 11496 11497 case 18: store_pair_long_double (cpu, offset, Post); return; 11498 case 19: load_pair_long_double (cpu, offset, Post); return; 11499 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return; 11500 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return; 11501 case 22: store_pair_long_double (cpu, offset, Pre); return; 11502 case 23: load_pair_long_double (cpu, offset, Pre); return; 11503 11504 default: 11505 HALT_UNALLOC; 11506 } 11507 } 11508 11509 static inline unsigned 11510 vec_reg (unsigned v, unsigned o) 11511 { 11512 return (v + o) & 0x3F; 11513 } 11514 11515 /* Load multiple N-element structures to M consecutive registers. */ 11516 static void 11517 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M) 11518 { 11519 int all = INSTR (30, 30); 11520 unsigned size = INSTR (11, 10); 11521 unsigned vd = INSTR (4, 0); 11522 unsigned rpt = (N == M) ? 1 : M; 11523 unsigned selem = N; 11524 unsigned i, j, k; 11525 11526 switch (size) 11527 { 11528 case 0: /* 8-bit operations. */ 11529 for (i = 0; i < rpt; i++) 11530 for (j = 0; j < (8 + (8 * all)); j++) 11531 for (k = 0; k < selem; k++) 11532 { 11533 aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j, 11534 aarch64_get_mem_u8 (cpu, address)); 11535 address += 1; 11536 } 11537 return; 11538 11539 case 1: /* 16-bit operations. */ 11540 for (i = 0; i < rpt; i++) 11541 for (j = 0; j < (4 + (4 * all)); j++) 11542 for (k = 0; k < selem; k++) 11543 { 11544 aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j, 11545 aarch64_get_mem_u16 (cpu, address)); 11546 address += 2; 11547 } 11548 return; 11549 11550 case 2: /* 32-bit operations. */ 11551 for (i = 0; i < rpt; i++) 11552 for (j = 0; j < (2 + (2 * all)); j++) 11553 for (k = 0; k < selem; k++) 11554 { 11555 aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j, 11556 aarch64_get_mem_u32 (cpu, address)); 11557 address += 4; 11558 } 11559 return; 11560 11561 case 3: /* 64-bit operations. */ 11562 for (i = 0; i < rpt; i++) 11563 for (j = 0; j < (1 + all); j++) 11564 for (k = 0; k < selem; k++) 11565 { 11566 aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j, 11567 aarch64_get_mem_u64 (cpu, address)); 11568 address += 8; 11569 } 11570 return; 11571 } 11572 } 11573 11574 /* Load multiple 4-element structures into four consecutive registers. */ 11575 static void 11576 LD4 (sim_cpu *cpu, uint64_t address) 11577 { 11578 vec_load (cpu, address, 4, 4); 11579 } 11580 11581 /* Load multiple 3-element structures into three consecutive registers. */ 11582 static void 11583 LD3 (sim_cpu *cpu, uint64_t address) 11584 { 11585 vec_load (cpu, address, 3, 3); 11586 } 11587 11588 /* Load multiple 2-element structures into two consecutive registers. */ 11589 static void 11590 LD2 (sim_cpu *cpu, uint64_t address) 11591 { 11592 vec_load (cpu, address, 2, 2); 11593 } 11594 11595 /* Load multiple 1-element structures into one register. */ 11596 static void 11597 LD1_1 (sim_cpu *cpu, uint64_t address) 11598 { 11599 vec_load (cpu, address, 1, 1); 11600 } 11601 11602 /* Load multiple 1-element structures into two registers. */ 11603 static void 11604 LD1_2 (sim_cpu *cpu, uint64_t address) 11605 { 11606 vec_load (cpu, address, 1, 2); 11607 } 11608 11609 /* Load multiple 1-element structures into three registers. */ 11610 static void 11611 LD1_3 (sim_cpu *cpu, uint64_t address) 11612 { 11613 vec_load (cpu, address, 1, 3); 11614 } 11615 11616 /* Load multiple 1-element structures into four registers. */ 11617 static void 11618 LD1_4 (sim_cpu *cpu, uint64_t address) 11619 { 11620 vec_load (cpu, address, 1, 4); 11621 } 11622 11623 /* Store multiple N-element structures from M consecutive registers. */ 11624 static void 11625 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M) 11626 { 11627 int all = INSTR (30, 30); 11628 unsigned size = INSTR (11, 10); 11629 unsigned vd = INSTR (4, 0); 11630 unsigned rpt = (N == M) ? 1 : M; 11631 unsigned selem = N; 11632 unsigned i, j, k; 11633 11634 switch (size) 11635 { 11636 case 0: /* 8-bit operations. */ 11637 for (i = 0; i < rpt; i++) 11638 for (j = 0; j < (8 + (8 * all)); j++) 11639 for (k = 0; k < selem; k++) 11640 { 11641 aarch64_set_mem_u8 11642 (cpu, address, 11643 aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j)); 11644 address += 1; 11645 } 11646 return; 11647 11648 case 1: /* 16-bit operations. */ 11649 for (i = 0; i < rpt; i++) 11650 for (j = 0; j < (4 + (4 * all)); j++) 11651 for (k = 0; k < selem; k++) 11652 { 11653 aarch64_set_mem_u16 11654 (cpu, address, 11655 aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j)); 11656 address += 2; 11657 } 11658 return; 11659 11660 case 2: /* 32-bit operations. */ 11661 for (i = 0; i < rpt; i++) 11662 for (j = 0; j < (2 + (2 * all)); j++) 11663 for (k = 0; k < selem; k++) 11664 { 11665 aarch64_set_mem_u32 11666 (cpu, address, 11667 aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j)); 11668 address += 4; 11669 } 11670 return; 11671 11672 case 3: /* 64-bit operations. */ 11673 for (i = 0; i < rpt; i++) 11674 for (j = 0; j < (1 + all); j++) 11675 for (k = 0; k < selem; k++) 11676 { 11677 aarch64_set_mem_u64 11678 (cpu, address, 11679 aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j)); 11680 address += 8; 11681 } 11682 return; 11683 } 11684 } 11685 11686 /* Store multiple 4-element structure from four consecutive registers. */ 11687 static void 11688 ST4 (sim_cpu *cpu, uint64_t address) 11689 { 11690 vec_store (cpu, address, 4, 4); 11691 } 11692 11693 /* Store multiple 3-element structures from three consecutive registers. */ 11694 static void 11695 ST3 (sim_cpu *cpu, uint64_t address) 11696 { 11697 vec_store (cpu, address, 3, 3); 11698 } 11699 11700 /* Store multiple 2-element structures from two consecutive registers. */ 11701 static void 11702 ST2 (sim_cpu *cpu, uint64_t address) 11703 { 11704 vec_store (cpu, address, 2, 2); 11705 } 11706 11707 /* Store multiple 1-element structures from one register. */ 11708 static void 11709 ST1_1 (sim_cpu *cpu, uint64_t address) 11710 { 11711 vec_store (cpu, address, 1, 1); 11712 } 11713 11714 /* Store multiple 1-element structures from two registers. */ 11715 static void 11716 ST1_2 (sim_cpu *cpu, uint64_t address) 11717 { 11718 vec_store (cpu, address, 1, 2); 11719 } 11720 11721 /* Store multiple 1-element structures from three registers. */ 11722 static void 11723 ST1_3 (sim_cpu *cpu, uint64_t address) 11724 { 11725 vec_store (cpu, address, 1, 3); 11726 } 11727 11728 /* Store multiple 1-element structures from four registers. */ 11729 static void 11730 ST1_4 (sim_cpu *cpu, uint64_t address) 11731 { 11732 vec_store (cpu, address, 1, 4); 11733 } 11734 11735 #define LDn_STn_SINGLE_LANE_AND_SIZE() \ 11736 do \ 11737 { \ 11738 switch (INSTR (15, 14)) \ 11739 { \ 11740 case 0: \ 11741 lane = (full << 3) | (s << 2) | size; \ 11742 size = 0; \ 11743 break; \ 11744 \ 11745 case 1: \ 11746 if ((size & 1) == 1) \ 11747 HALT_UNALLOC; \ 11748 lane = (full << 2) | (s << 1) | (size >> 1); \ 11749 size = 1; \ 11750 break; \ 11751 \ 11752 case 2: \ 11753 if ((size & 2) == 2) \ 11754 HALT_UNALLOC; \ 11755 \ 11756 if ((size & 1) == 0) \ 11757 { \ 11758 lane = (full << 1) | s; \ 11759 size = 2; \ 11760 } \ 11761 else \ 11762 { \ 11763 if (s) \ 11764 HALT_UNALLOC; \ 11765 lane = full; \ 11766 size = 3; \ 11767 } \ 11768 break; \ 11769 \ 11770 default: \ 11771 HALT_UNALLOC; \ 11772 } \ 11773 } \ 11774 while (0) 11775 11776 /* Load single structure into one lane of N registers. */ 11777 static void 11778 do_vec_LDn_single (sim_cpu *cpu, uint64_t address) 11779 { 11780 /* instr[31] = 0 11781 instr[30] = element selector 0=>half, 1=>all elements 11782 instr[29,24] = 00 1101 11783 instr[23] = 0=>simple, 1=>post 11784 instr[22] = 1 11785 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1) 11786 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), 11787 11111 (immediate post inc) 11788 instr[15,13] = opcode 11789 instr[12] = S, used for lane number 11790 instr[11,10] = size, also used for lane number 11791 instr[9,5] = address 11792 instr[4,0] = Vd */ 11793 11794 unsigned full = INSTR (30, 30); 11795 unsigned vd = INSTR (4, 0); 11796 unsigned size = INSTR (11, 10); 11797 unsigned s = INSTR (12, 12); 11798 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; 11799 int lane = 0; 11800 int i; 11801 11802 NYI_assert (29, 24, 0x0D); 11803 NYI_assert (22, 22, 1); 11804 11805 /* Compute the lane number first (using size), and then compute size. */ 11806 LDn_STn_SINGLE_LANE_AND_SIZE (); 11807 11808 for (i = 0; i < nregs; i++) 11809 switch (size) 11810 { 11811 case 0: 11812 { 11813 uint8_t val = aarch64_get_mem_u8 (cpu, address + i); 11814 aarch64_set_vec_u8 (cpu, vd + i, lane, val); 11815 break; 11816 } 11817 11818 case 1: 11819 { 11820 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2)); 11821 aarch64_set_vec_u16 (cpu, vd + i, lane, val); 11822 break; 11823 } 11824 11825 case 2: 11826 { 11827 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4)); 11828 aarch64_set_vec_u32 (cpu, vd + i, lane, val); 11829 break; 11830 } 11831 11832 case 3: 11833 { 11834 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8)); 11835 aarch64_set_vec_u64 (cpu, vd + i, lane, val); 11836 break; 11837 } 11838 } 11839 } 11840 11841 /* Store single structure from one lane from N registers. */ 11842 static void 11843 do_vec_STn_single (sim_cpu *cpu, uint64_t address) 11844 { 11845 /* instr[31] = 0 11846 instr[30] = element selector 0=>half, 1=>all elements 11847 instr[29,24] = 00 1101 11848 instr[23] = 0=>simple, 1=>post 11849 instr[22] = 0 11850 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1) 11851 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), 11852 11111 (immediate post inc) 11853 instr[15,13] = opcode 11854 instr[12] = S, used for lane number 11855 instr[11,10] = size, also used for lane number 11856 instr[9,5] = address 11857 instr[4,0] = Vd */ 11858 11859 unsigned full = INSTR (30, 30); 11860 unsigned vd = INSTR (4, 0); 11861 unsigned size = INSTR (11, 10); 11862 unsigned s = INSTR (12, 12); 11863 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; 11864 int lane = 0; 11865 int i; 11866 11867 NYI_assert (29, 24, 0x0D); 11868 NYI_assert (22, 22, 0); 11869 11870 /* Compute the lane number first (using size), and then compute size. */ 11871 LDn_STn_SINGLE_LANE_AND_SIZE (); 11872 11873 for (i = 0; i < nregs; i++) 11874 switch (size) 11875 { 11876 case 0: 11877 { 11878 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane); 11879 aarch64_set_mem_u8 (cpu, address + i, val); 11880 break; 11881 } 11882 11883 case 1: 11884 { 11885 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane); 11886 aarch64_set_mem_u16 (cpu, address + (i * 2), val); 11887 break; 11888 } 11889 11890 case 2: 11891 { 11892 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane); 11893 aarch64_set_mem_u32 (cpu, address + (i * 4), val); 11894 break; 11895 } 11896 11897 case 3: 11898 { 11899 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane); 11900 aarch64_set_mem_u64 (cpu, address + (i * 8), val); 11901 break; 11902 } 11903 } 11904 } 11905 11906 /* Load single structure into all lanes of N registers. */ 11907 static void 11908 do_vec_LDnR (sim_cpu *cpu, uint64_t address) 11909 { 11910 /* instr[31] = 0 11911 instr[30] = element selector 0=>half, 1=>all elements 11912 instr[29,24] = 00 1101 11913 instr[23] = 0=>simple, 1=>post 11914 instr[22] = 1 11915 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1) 11916 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), 11917 11111 (immediate post inc) 11918 instr[15,14] = 11 11919 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1) 11920 instr[12] = 0 11921 instr[11,10] = element size 00=> byte(b), 01=> half(h), 11922 10=> word(s), 11=> double(d) 11923 instr[9,5] = address 11924 instr[4,0] = Vd */ 11925 11926 unsigned full = INSTR (30, 30); 11927 unsigned vd = INSTR (4, 0); 11928 unsigned size = INSTR (11, 10); 11929 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; 11930 int i, n; 11931 11932 NYI_assert (29, 24, 0x0D); 11933 NYI_assert (22, 22, 1); 11934 NYI_assert (15, 14, 3); 11935 NYI_assert (12, 12, 0); 11936 11937 for (n = 0; n < nregs; n++) 11938 switch (size) 11939 { 11940 case 0: 11941 { 11942 uint8_t val = aarch64_get_mem_u8 (cpu, address + n); 11943 for (i = 0; i < (full ? 16 : 8); i++) 11944 aarch64_set_vec_u8 (cpu, vd + n, i, val); 11945 break; 11946 } 11947 11948 case 1: 11949 { 11950 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2)); 11951 for (i = 0; i < (full ? 8 : 4); i++) 11952 aarch64_set_vec_u16 (cpu, vd + n, i, val); 11953 break; 11954 } 11955 11956 case 2: 11957 { 11958 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4)); 11959 for (i = 0; i < (full ? 4 : 2); i++) 11960 aarch64_set_vec_u32 (cpu, vd + n, i, val); 11961 break; 11962 } 11963 11964 case 3: 11965 { 11966 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8)); 11967 for (i = 0; i < (full ? 2 : 1); i++) 11968 aarch64_set_vec_u64 (cpu, vd + n, i, val); 11969 break; 11970 } 11971 11972 default: 11973 HALT_UNALLOC; 11974 } 11975 } 11976 11977 static void 11978 do_vec_load_store (sim_cpu *cpu) 11979 { 11980 /* {LD|ST}<N> {Vd..Vd+N}, vaddr 11981 11982 instr[31] = 0 11983 instr[30] = element selector 0=>half, 1=>all elements 11984 instr[29,25] = 00110 11985 instr[24] = 0=>multiple struct, 1=>single struct 11986 instr[23] = 0=>simple, 1=>post 11987 instr[22] = 0=>store, 1=>load 11988 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR) 11989 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP), 11990 11111 (immediate post inc) 11991 instr[15,12] = elements and destinations. eg for load: 11992 0000=>LD4 => load multiple 4-element to 11993 four consecutive registers 11994 0100=>LD3 => load multiple 3-element to 11995 three consecutive registers 11996 1000=>LD2 => load multiple 2-element to 11997 two consecutive registers 11998 0010=>LD1 => load multiple 1-element to 11999 four consecutive registers 12000 0110=>LD1 => load multiple 1-element to 12001 three consecutive registers 12002 1010=>LD1 => load multiple 1-element to 12003 two consecutive registers 12004 0111=>LD1 => load multiple 1-element to 12005 one register 12006 1100=>LDR1,LDR2 12007 1110=>LDR3,LDR4 12008 instr[11,10] = element size 00=> byte(b), 01=> half(h), 12009 10=> word(s), 11=> double(d) 12010 instr[9,5] = Vn, can be SP 12011 instr[4,0] = Vd */ 12012 12013 int single; 12014 int post; 12015 int load; 12016 unsigned vn; 12017 uint64_t address; 12018 int type; 12019 12020 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06) 12021 HALT_NYI; 12022 12023 single = INSTR (24, 24); 12024 post = INSTR (23, 23); 12025 load = INSTR (22, 22); 12026 type = INSTR (15, 12); 12027 vn = INSTR (9, 5); 12028 address = aarch64_get_reg_u64 (cpu, vn, SP_OK); 12029 12030 if (! single && INSTR (21, 21) != 0) 12031 HALT_UNALLOC; 12032 12033 if (post) 12034 { 12035 unsigned vm = INSTR (20, 16); 12036 12037 if (vm == R31) 12038 { 12039 unsigned sizeof_operation; 12040 12041 if (single) 12042 { 12043 if ((type >= 0) && (type <= 11)) 12044 { 12045 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; 12046 switch (INSTR (15, 14)) 12047 { 12048 case 0: 12049 sizeof_operation = nregs * 1; 12050 break; 12051 case 1: 12052 sizeof_operation = nregs * 2; 12053 break; 12054 case 2: 12055 if (INSTR (10, 10) == 0) 12056 sizeof_operation = nregs * 4; 12057 else 12058 sizeof_operation = nregs * 8; 12059 break; 12060 default: 12061 HALT_UNALLOC; 12062 } 12063 } 12064 else if (type == 0xC) 12065 { 12066 sizeof_operation = INSTR (21, 21) ? 2 : 1; 12067 sizeof_operation <<= INSTR (11, 10); 12068 } 12069 else if (type == 0xE) 12070 { 12071 sizeof_operation = INSTR (21, 21) ? 4 : 3; 12072 sizeof_operation <<= INSTR (11, 10); 12073 } 12074 else 12075 HALT_UNALLOC; 12076 } 12077 else 12078 { 12079 switch (type) 12080 { 12081 case 0: sizeof_operation = 32; break; 12082 case 4: sizeof_operation = 24; break; 12083 case 8: sizeof_operation = 16; break; 12084 12085 case 7: 12086 /* One register, immediate offset variant. */ 12087 sizeof_operation = 8; 12088 break; 12089 12090 case 10: 12091 /* Two registers, immediate offset variant. */ 12092 sizeof_operation = 16; 12093 break; 12094 12095 case 6: 12096 /* Three registers, immediate offset variant. */ 12097 sizeof_operation = 24; 12098 break; 12099 12100 case 2: 12101 /* Four registers, immediate offset variant. */ 12102 sizeof_operation = 32; 12103 break; 12104 12105 default: 12106 HALT_UNALLOC; 12107 } 12108 12109 if (INSTR (30, 30)) 12110 sizeof_operation *= 2; 12111 } 12112 12113 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation); 12114 } 12115 else 12116 aarch64_set_reg_u64 (cpu, vn, SP_OK, 12117 address + aarch64_get_reg_u64 (cpu, vm, NO_SP)); 12118 } 12119 else 12120 { 12121 NYI_assert (20, 16, 0); 12122 } 12123 12124 if (single) 12125 { 12126 if (load) 12127 { 12128 if ((type >= 0) && (type <= 11)) 12129 do_vec_LDn_single (cpu, address); 12130 else if ((type == 0xC) || (type == 0xE)) 12131 do_vec_LDnR (cpu, address); 12132 else 12133 HALT_UNALLOC; 12134 return; 12135 } 12136 12137 /* Stores. */ 12138 if ((type >= 0) && (type <= 11)) 12139 { 12140 do_vec_STn_single (cpu, address); 12141 return; 12142 } 12143 12144 HALT_UNALLOC; 12145 } 12146 12147 if (load) 12148 { 12149 switch (type) 12150 { 12151 case 0: LD4 (cpu, address); return; 12152 case 4: LD3 (cpu, address); return; 12153 case 8: LD2 (cpu, address); return; 12154 case 2: LD1_4 (cpu, address); return; 12155 case 6: LD1_3 (cpu, address); return; 12156 case 10: LD1_2 (cpu, address); return; 12157 case 7: LD1_1 (cpu, address); return; 12158 12159 default: 12160 HALT_UNALLOC; 12161 } 12162 } 12163 12164 /* Stores. */ 12165 switch (type) 12166 { 12167 case 0: ST4 (cpu, address); return; 12168 case 4: ST3 (cpu, address); return; 12169 case 8: ST2 (cpu, address); return; 12170 case 2: ST1_4 (cpu, address); return; 12171 case 6: ST1_3 (cpu, address); return; 12172 case 10: ST1_2 (cpu, address); return; 12173 case 7: ST1_1 (cpu, address); return; 12174 default: 12175 HALT_UNALLOC; 12176 } 12177 } 12178 12179 static void 12180 dexLdSt (sim_cpu *cpu) 12181 { 12182 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); 12183 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 || 12184 group == GROUP_LDST_1100 || group == GROUP_LDST_1110 12185 bits [29,28:26] of a LS are the secondary dispatch vector. */ 12186 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu)); 12187 12188 switch (group2) 12189 { 12190 case LS_EXCL_000: 12191 dexLoadExclusive (cpu); return; 12192 12193 case LS_LIT_010: 12194 case LS_LIT_011: 12195 dexLoadLiteral (cpu); return; 12196 12197 case LS_OTHER_110: 12198 case LS_OTHER_111: 12199 dexLoadOther (cpu); return; 12200 12201 case LS_ADVSIMD_001: 12202 do_vec_load_store (cpu); return; 12203 12204 case LS_PAIR_100: 12205 dex_load_store_pair_gr (cpu); return; 12206 12207 case LS_PAIR_101: 12208 dex_load_store_pair_fp (cpu); return; 12209 12210 default: 12211 /* Should never reach here. */ 12212 HALT_NYI; 12213 } 12214 } 12215 12216 /* Specific decode and execute for group Data Processing Register. */ 12217 12218 static void 12219 dexLogicalShiftedRegister (sim_cpu *cpu) 12220 { 12221 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 12222 instr[30,29] = op 12223 instr[28:24] = 01010 12224 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR 12225 instr[21] = N 12226 instr[20,16] = Rm 12227 instr[15,10] = count : must be 0xxxxx for 32 bit 12228 instr[9,5] = Rn 12229 instr[4,0] = Rd */ 12230 12231 uint32_t size = INSTR (31, 31); 12232 Shift shiftType = INSTR (23, 22); 12233 uint32_t count = INSTR (15, 10); 12234 12235 /* 32 bit operations must have count[5] = 0. 12236 or else we have an UNALLOC. */ 12237 if (size == 0 && uimm (count, 5, 5)) 12238 HALT_UNALLOC; 12239 12240 /* Dispatch on size:op:N. */ 12241 switch ((INSTR (31, 29) << 1) | INSTR (21, 21)) 12242 { 12243 case 0: and32_shift (cpu, shiftType, count); return; 12244 case 1: bic32_shift (cpu, shiftType, count); return; 12245 case 2: orr32_shift (cpu, shiftType, count); return; 12246 case 3: orn32_shift (cpu, shiftType, count); return; 12247 case 4: eor32_shift (cpu, shiftType, count); return; 12248 case 5: eon32_shift (cpu, shiftType, count); return; 12249 case 6: ands32_shift (cpu, shiftType, count); return; 12250 case 7: bics32_shift (cpu, shiftType, count); return; 12251 case 8: and64_shift (cpu, shiftType, count); return; 12252 case 9: bic64_shift (cpu, shiftType, count); return; 12253 case 10:orr64_shift (cpu, shiftType, count); return; 12254 case 11:orn64_shift (cpu, shiftType, count); return; 12255 case 12:eor64_shift (cpu, shiftType, count); return; 12256 case 13:eon64_shift (cpu, shiftType, count); return; 12257 case 14:ands64_shift (cpu, shiftType, count); return; 12258 case 15:bics64_shift (cpu, shiftType, count); return; 12259 } 12260 } 12261 12262 /* 32 bit conditional select. */ 12263 static void 12264 csel32 (sim_cpu *cpu, CondCode cc) 12265 { 12266 unsigned rm = INSTR (20, 16); 12267 unsigned rn = INSTR (9, 5); 12268 unsigned rd = INSTR (4, 0); 12269 12270 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12271 testConditionCode (cpu, cc) 12272 ? aarch64_get_reg_u32 (cpu, rn, NO_SP) 12273 : aarch64_get_reg_u32 (cpu, rm, NO_SP)); 12274 } 12275 12276 /* 64 bit conditional select. */ 12277 static void 12278 csel64 (sim_cpu *cpu, CondCode cc) 12279 { 12280 unsigned rm = INSTR (20, 16); 12281 unsigned rn = INSTR (9, 5); 12282 unsigned rd = INSTR (4, 0); 12283 12284 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12285 testConditionCode (cpu, cc) 12286 ? aarch64_get_reg_u64 (cpu, rn, NO_SP) 12287 : aarch64_get_reg_u64 (cpu, rm, NO_SP)); 12288 } 12289 12290 /* 32 bit conditional increment. */ 12291 static void 12292 csinc32 (sim_cpu *cpu, CondCode cc) 12293 { 12294 unsigned rm = INSTR (20, 16); 12295 unsigned rn = INSTR (9, 5); 12296 unsigned rd = INSTR (4, 0); 12297 12298 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12299 testConditionCode (cpu, cc) 12300 ? aarch64_get_reg_u32 (cpu, rn, NO_SP) 12301 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1); 12302 } 12303 12304 /* 64 bit conditional increment. */ 12305 static void 12306 csinc64 (sim_cpu *cpu, CondCode cc) 12307 { 12308 unsigned rm = INSTR (20, 16); 12309 unsigned rn = INSTR (9, 5); 12310 unsigned rd = INSTR (4, 0); 12311 12312 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12313 testConditionCode (cpu, cc) 12314 ? aarch64_get_reg_u64 (cpu, rn, NO_SP) 12315 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1); 12316 } 12317 12318 /* 32 bit conditional invert. */ 12319 static void 12320 csinv32 (sim_cpu *cpu, CondCode cc) 12321 { 12322 unsigned rm = INSTR (20, 16); 12323 unsigned rn = INSTR (9, 5); 12324 unsigned rd = INSTR (4, 0); 12325 12326 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12327 testConditionCode (cpu, cc) 12328 ? aarch64_get_reg_u32 (cpu, rn, NO_SP) 12329 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP)); 12330 } 12331 12332 /* 64 bit conditional invert. */ 12333 static void 12334 csinv64 (sim_cpu *cpu, CondCode cc) 12335 { 12336 unsigned rm = INSTR (20, 16); 12337 unsigned rn = INSTR (9, 5); 12338 unsigned rd = INSTR (4, 0); 12339 12340 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12341 testConditionCode (cpu, cc) 12342 ? aarch64_get_reg_u64 (cpu, rn, NO_SP) 12343 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP)); 12344 } 12345 12346 /* 32 bit conditional negate. */ 12347 static void 12348 csneg32 (sim_cpu *cpu, CondCode cc) 12349 { 12350 unsigned rm = INSTR (20, 16); 12351 unsigned rn = INSTR (9, 5); 12352 unsigned rd = INSTR (4, 0); 12353 12354 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12355 testConditionCode (cpu, cc) 12356 ? aarch64_get_reg_u32 (cpu, rn, NO_SP) 12357 : - aarch64_get_reg_u32 (cpu, rm, NO_SP)); 12358 } 12359 12360 /* 64 bit conditional negate. */ 12361 static void 12362 csneg64 (sim_cpu *cpu, CondCode cc) 12363 { 12364 unsigned rm = INSTR (20, 16); 12365 unsigned rn = INSTR (9, 5); 12366 unsigned rd = INSTR (4, 0); 12367 12368 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12369 testConditionCode (cpu, cc) 12370 ? aarch64_get_reg_u64 (cpu, rn, NO_SP) 12371 : - aarch64_get_reg_u64 (cpu, rm, NO_SP)); 12372 } 12373 12374 static void 12375 dexCondSelect (sim_cpu *cpu) 12376 { 12377 /* instr[28,21] = 11011011 12378 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 12379 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC, 12380 100 ==> CSINV, 101 ==> CSNEG, 12381 _1_ ==> UNALLOC 12382 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC 12383 instr[15,12] = cond 12384 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */ 12385 12386 CondCode cc = INSTR (15, 12); 12387 uint32_t S = INSTR (29, 29); 12388 uint32_t op2 = INSTR (11, 10); 12389 12390 if (S == 1) 12391 HALT_UNALLOC; 12392 12393 if (op2 & 0x2) 12394 HALT_UNALLOC; 12395 12396 switch ((INSTR (31, 30) << 1) | op2) 12397 { 12398 case 0: csel32 (cpu, cc); return; 12399 case 1: csinc32 (cpu, cc); return; 12400 case 2: csinv32 (cpu, cc); return; 12401 case 3: csneg32 (cpu, cc); return; 12402 case 4: csel64 (cpu, cc); return; 12403 case 5: csinc64 (cpu, cc); return; 12404 case 6: csinv64 (cpu, cc); return; 12405 case 7: csneg64 (cpu, cc); return; 12406 } 12407 } 12408 12409 /* Some helpers for counting leading 1 or 0 bits. */ 12410 12411 /* Counts the number of leading bits which are the same 12412 in a 32 bit value in the range 1 to 32. */ 12413 static uint32_t 12414 leading32 (uint32_t value) 12415 { 12416 int32_t mask= 0xffff0000; 12417 uint32_t count= 16; /* Counts number of bits set in mask. */ 12418 uint32_t lo = 1; /* Lower bound for number of sign bits. */ 12419 uint32_t hi = 32; /* Upper bound for number of sign bits. */ 12420 12421 while (lo + 1 < hi) 12422 { 12423 int32_t test = (value & mask); 12424 12425 if (test == 0 || test == mask) 12426 { 12427 lo = count; 12428 count = (lo + hi) / 2; 12429 mask >>= (count - lo); 12430 } 12431 else 12432 { 12433 hi = count; 12434 count = (lo + hi) / 2; 12435 mask <<= hi - count; 12436 } 12437 } 12438 12439 if (lo != hi) 12440 { 12441 int32_t test; 12442 12443 mask >>= 1; 12444 test = (value & mask); 12445 12446 if (test == 0 || test == mask) 12447 count = hi; 12448 else 12449 count = lo; 12450 } 12451 12452 return count; 12453 } 12454 12455 /* Counts the number of leading bits which are the same 12456 in a 64 bit value in the range 1 to 64. */ 12457 static uint64_t 12458 leading64 (uint64_t value) 12459 { 12460 int64_t mask= 0xffffffff00000000LL; 12461 uint64_t count = 32; /* Counts number of bits set in mask. */ 12462 uint64_t lo = 1; /* Lower bound for number of sign bits. */ 12463 uint64_t hi = 64; /* Upper bound for number of sign bits. */ 12464 12465 while (lo + 1 < hi) 12466 { 12467 int64_t test = (value & mask); 12468 12469 if (test == 0 || test == mask) 12470 { 12471 lo = count; 12472 count = (lo + hi) / 2; 12473 mask >>= (count - lo); 12474 } 12475 else 12476 { 12477 hi = count; 12478 count = (lo + hi) / 2; 12479 mask <<= hi - count; 12480 } 12481 } 12482 12483 if (lo != hi) 12484 { 12485 int64_t test; 12486 12487 mask >>= 1; 12488 test = (value & mask); 12489 12490 if (test == 0 || test == mask) 12491 count = hi; 12492 else 12493 count = lo; 12494 } 12495 12496 return count; 12497 } 12498 12499 /* Bit operations. */ 12500 /* N.B register args may not be SP. */ 12501 12502 /* 32 bit count leading sign bits. */ 12503 static void 12504 cls32 (sim_cpu *cpu) 12505 { 12506 unsigned rn = INSTR (9, 5); 12507 unsigned rd = INSTR (4, 0); 12508 12509 /* N.B. the result needs to exclude the leading bit. */ 12510 aarch64_set_reg_u64 12511 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1); 12512 } 12513 12514 /* 64 bit count leading sign bits. */ 12515 static void 12516 cls64 (sim_cpu *cpu) 12517 { 12518 unsigned rn = INSTR (9, 5); 12519 unsigned rd = INSTR (4, 0); 12520 12521 /* N.B. the result needs to exclude the leading bit. */ 12522 aarch64_set_reg_u64 12523 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1); 12524 } 12525 12526 /* 32 bit count leading zero bits. */ 12527 static void 12528 clz32 (sim_cpu *cpu) 12529 { 12530 unsigned rn = INSTR (9, 5); 12531 unsigned rd = INSTR (4, 0); 12532 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12533 12534 /* if the sign (top) bit is set then the count is 0. */ 12535 if (pick32 (value, 31, 31)) 12536 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L); 12537 else 12538 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value)); 12539 } 12540 12541 /* 64 bit count leading zero bits. */ 12542 static void 12543 clz64 (sim_cpu *cpu) 12544 { 12545 unsigned rn = INSTR (9, 5); 12546 unsigned rd = INSTR (4, 0); 12547 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 12548 12549 /* if the sign (top) bit is set then the count is 0. */ 12550 if (pick64 (value, 63, 63)) 12551 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L); 12552 else 12553 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value)); 12554 } 12555 12556 /* 32 bit reverse bits. */ 12557 static void 12558 rbit32 (sim_cpu *cpu) 12559 { 12560 unsigned rn = INSTR (9, 5); 12561 unsigned rd = INSTR (4, 0); 12562 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12563 uint32_t result = 0; 12564 int i; 12565 12566 for (i = 0; i < 32; i++) 12567 { 12568 result <<= 1; 12569 result |= (value & 1); 12570 value >>= 1; 12571 } 12572 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12573 } 12574 12575 /* 64 bit reverse bits. */ 12576 static void 12577 rbit64 (sim_cpu *cpu) 12578 { 12579 unsigned rn = INSTR (9, 5); 12580 unsigned rd = INSTR (4, 0); 12581 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 12582 uint64_t result = 0; 12583 int i; 12584 12585 for (i = 0; i < 64; i++) 12586 { 12587 result <<= 1; 12588 result |= (value & 1UL); 12589 value >>= 1; 12590 } 12591 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12592 } 12593 12594 /* 32 bit reverse bytes. */ 12595 static void 12596 rev32 (sim_cpu *cpu) 12597 { 12598 unsigned rn = INSTR (9, 5); 12599 unsigned rd = INSTR (4, 0); 12600 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12601 uint32_t result = 0; 12602 int i; 12603 12604 for (i = 0; i < 4; i++) 12605 { 12606 result <<= 8; 12607 result |= (value & 0xff); 12608 value >>= 8; 12609 } 12610 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12611 } 12612 12613 /* 64 bit reverse bytes. */ 12614 static void 12615 rev64 (sim_cpu *cpu) 12616 { 12617 unsigned rn = INSTR (9, 5); 12618 unsigned rd = INSTR (4, 0); 12619 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 12620 uint64_t result = 0; 12621 int i; 12622 12623 for (i = 0; i < 8; i++) 12624 { 12625 result <<= 8; 12626 result |= (value & 0xffULL); 12627 value >>= 8; 12628 } 12629 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12630 } 12631 12632 /* 32 bit reverse shorts. */ 12633 /* N.B.this reverses the order of the bytes in each half word. */ 12634 static void 12635 revh32 (sim_cpu *cpu) 12636 { 12637 unsigned rn = INSTR (9, 5); 12638 unsigned rd = INSTR (4, 0); 12639 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12640 uint32_t result = 0; 12641 int i; 12642 12643 for (i = 0; i < 2; i++) 12644 { 12645 result <<= 8; 12646 result |= (value & 0x00ff00ff); 12647 value >>= 8; 12648 } 12649 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12650 } 12651 12652 /* 64 bit reverse shorts. */ 12653 /* N.B.this reverses the order of the bytes in each half word. */ 12654 static void 12655 revh64 (sim_cpu *cpu) 12656 { 12657 unsigned rn = INSTR (9, 5); 12658 unsigned rd = INSTR (4, 0); 12659 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); 12660 uint64_t result = 0; 12661 int i; 12662 12663 for (i = 0; i < 2; i++) 12664 { 12665 result <<= 8; 12666 result |= (value & 0x00ff00ff00ff00ffULL); 12667 value >>= 8; 12668 } 12669 aarch64_set_reg_u64 (cpu, rd, NO_SP, result); 12670 } 12671 12672 static void 12673 dexDataProc1Source (sim_cpu *cpu) 12674 { 12675 /* instr[30] = 1 12676 instr[28,21] = 111010110 12677 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 12678 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC 12679 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC 12680 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16, 12681 000010 ==> REV, 000011 ==> UNALLOC 12682 000100 ==> CLZ, 000101 ==> CLS 12683 ow ==> UNALLOC 12684 instr[9,5] = rn : may not be SP 12685 instr[4,0] = rd : may not be SP. */ 12686 12687 uint32_t S = INSTR (29, 29); 12688 uint32_t opcode2 = INSTR (20, 16); 12689 uint32_t opcode = INSTR (15, 10); 12690 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode); 12691 12692 if (S == 1) 12693 HALT_UNALLOC; 12694 12695 if (opcode2 != 0) 12696 HALT_UNALLOC; 12697 12698 if (opcode & 0x38) 12699 HALT_UNALLOC; 12700 12701 switch (dispatch) 12702 { 12703 case 0: rbit32 (cpu); return; 12704 case 1: revh32 (cpu); return; 12705 case 2: rev32 (cpu); return; 12706 case 4: clz32 (cpu); return; 12707 case 5: cls32 (cpu); return; 12708 case 8: rbit64 (cpu); return; 12709 case 9: revh64 (cpu); return; 12710 case 10:rev32 (cpu); return; 12711 case 11:rev64 (cpu); return; 12712 case 12:clz64 (cpu); return; 12713 case 13:cls64 (cpu); return; 12714 default: HALT_UNALLOC; 12715 } 12716 } 12717 12718 /* Variable shift. 12719 Shifts by count supplied in register. 12720 N.B register args may not be SP. 12721 These all use the shifted auxiliary function for 12722 simplicity and clarity. Writing the actual shift 12723 inline would avoid a branch and so be faster but 12724 would also necessitate getting signs right. */ 12725 12726 /* 32 bit arithmetic shift right. */ 12727 static void 12728 asrv32 (sim_cpu *cpu) 12729 { 12730 unsigned rm = INSTR (20, 16); 12731 unsigned rn = INSTR (9, 5); 12732 unsigned rd = INSTR (4, 0); 12733 12734 aarch64_set_reg_u64 12735 (cpu, rd, NO_SP, 12736 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR, 12737 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); 12738 } 12739 12740 /* 64 bit arithmetic shift right. */ 12741 static void 12742 asrv64 (sim_cpu *cpu) 12743 { 12744 unsigned rm = INSTR (20, 16); 12745 unsigned rn = INSTR (9, 5); 12746 unsigned rd = INSTR (4, 0); 12747 12748 aarch64_set_reg_u64 12749 (cpu, rd, NO_SP, 12750 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR, 12751 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); 12752 } 12753 12754 /* 32 bit logical shift left. */ 12755 static void 12756 lslv32 (sim_cpu *cpu) 12757 { 12758 unsigned rm = INSTR (20, 16); 12759 unsigned rn = INSTR (9, 5); 12760 unsigned rd = INSTR (4, 0); 12761 12762 aarch64_set_reg_u64 12763 (cpu, rd, NO_SP, 12764 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL, 12765 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); 12766 } 12767 12768 /* 64 bit arithmetic shift left. */ 12769 static void 12770 lslv64 (sim_cpu *cpu) 12771 { 12772 unsigned rm = INSTR (20, 16); 12773 unsigned rn = INSTR (9, 5); 12774 unsigned rd = INSTR (4, 0); 12775 12776 aarch64_set_reg_u64 12777 (cpu, rd, NO_SP, 12778 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL, 12779 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); 12780 } 12781 12782 /* 32 bit logical shift right. */ 12783 static void 12784 lsrv32 (sim_cpu *cpu) 12785 { 12786 unsigned rm = INSTR (20, 16); 12787 unsigned rn = INSTR (9, 5); 12788 unsigned rd = INSTR (4, 0); 12789 12790 aarch64_set_reg_u64 12791 (cpu, rd, NO_SP, 12792 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR, 12793 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); 12794 } 12795 12796 /* 64 bit logical shift right. */ 12797 static void 12798 lsrv64 (sim_cpu *cpu) 12799 { 12800 unsigned rm = INSTR (20, 16); 12801 unsigned rn = INSTR (9, 5); 12802 unsigned rd = INSTR (4, 0); 12803 12804 aarch64_set_reg_u64 12805 (cpu, rd, NO_SP, 12806 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR, 12807 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); 12808 } 12809 12810 /* 32 bit rotate right. */ 12811 static void 12812 rorv32 (sim_cpu *cpu) 12813 { 12814 unsigned rm = INSTR (20, 16); 12815 unsigned rn = INSTR (9, 5); 12816 unsigned rd = INSTR (4, 0); 12817 12818 aarch64_set_reg_u64 12819 (cpu, rd, NO_SP, 12820 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR, 12821 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); 12822 } 12823 12824 /* 64 bit rotate right. */ 12825 static void 12826 rorv64 (sim_cpu *cpu) 12827 { 12828 unsigned rm = INSTR (20, 16); 12829 unsigned rn = INSTR (9, 5); 12830 unsigned rd = INSTR (4, 0); 12831 12832 aarch64_set_reg_u64 12833 (cpu, rd, NO_SP, 12834 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR, 12835 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); 12836 } 12837 12838 12839 /* divide. */ 12840 12841 /* 32 bit signed divide. */ 12842 static void 12843 cpuiv32 (sim_cpu *cpu) 12844 { 12845 unsigned rm = INSTR (20, 16); 12846 unsigned rn = INSTR (9, 5); 12847 unsigned rd = INSTR (4, 0); 12848 /* N.B. the pseudo-code does the divide using 64 bit data. */ 12849 /* TODO : check that this rounds towards zero as required. */ 12850 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP); 12851 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP); 12852 12853 aarch64_set_reg_s64 (cpu, rd, NO_SP, 12854 divisor ? ((int32_t) (dividend / divisor)) : 0); 12855 } 12856 12857 /* 64 bit signed divide. */ 12858 static void 12859 cpuiv64 (sim_cpu *cpu) 12860 { 12861 unsigned rm = INSTR (20, 16); 12862 unsigned rn = INSTR (9, 5); 12863 unsigned rd = INSTR (4, 0); 12864 12865 /* TODO : check that this rounds towards zero as required. */ 12866 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP); 12867 12868 aarch64_set_reg_s64 12869 (cpu, rd, NO_SP, 12870 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0); 12871 } 12872 12873 /* 32 bit unsigned divide. */ 12874 static void 12875 udiv32 (sim_cpu *cpu) 12876 { 12877 unsigned rm = INSTR (20, 16); 12878 unsigned rn = INSTR (9, 5); 12879 unsigned rd = INSTR (4, 0); 12880 12881 /* N.B. the pseudo-code does the divide using 64 bit data. */ 12882 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP); 12883 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP); 12884 12885 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12886 divisor ? (uint32_t) (dividend / divisor) : 0); 12887 } 12888 12889 /* 64 bit unsigned divide. */ 12890 static void 12891 udiv64 (sim_cpu *cpu) 12892 { 12893 unsigned rm = INSTR (20, 16); 12894 unsigned rn = INSTR (9, 5); 12895 unsigned rd = INSTR (4, 0); 12896 12897 /* TODO : check that this rounds towards zero as required. */ 12898 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP); 12899 12900 aarch64_set_reg_u64 12901 (cpu, rd, NO_SP, 12902 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0); 12903 } 12904 12905 static void 12906 dexDataProc2Source (sim_cpu *cpu) 12907 { 12908 /* assert instr[30] == 0 12909 instr[28,21] == 11010110 12910 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit 12911 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC 12912 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV, 12913 001000 ==> LSLV, 001001 ==> LSRV 12914 001010 ==> ASRV, 001011 ==> RORV 12915 ow ==> UNALLOC. */ 12916 12917 uint32_t dispatch; 12918 uint32_t S = INSTR (29, 29); 12919 uint32_t opcode = INSTR (15, 10); 12920 12921 if (S == 1) 12922 HALT_UNALLOC; 12923 12924 if (opcode & 0x34) 12925 HALT_UNALLOC; 12926 12927 dispatch = ( (INSTR (31, 31) << 3) 12928 | (uimm (opcode, 3, 3) << 2) 12929 | uimm (opcode, 1, 0)); 12930 switch (dispatch) 12931 { 12932 case 2: udiv32 (cpu); return; 12933 case 3: cpuiv32 (cpu); return; 12934 case 4: lslv32 (cpu); return; 12935 case 5: lsrv32 (cpu); return; 12936 case 6: asrv32 (cpu); return; 12937 case 7: rorv32 (cpu); return; 12938 case 10: udiv64 (cpu); return; 12939 case 11: cpuiv64 (cpu); return; 12940 case 12: lslv64 (cpu); return; 12941 case 13: lsrv64 (cpu); return; 12942 case 14: asrv64 (cpu); return; 12943 case 15: rorv64 (cpu); return; 12944 default: HALT_UNALLOC; 12945 } 12946 } 12947 12948 12949 /* Multiply. */ 12950 12951 /* 32 bit multiply and add. */ 12952 static void 12953 madd32 (sim_cpu *cpu) 12954 { 12955 unsigned rm = INSTR (20, 16); 12956 unsigned ra = INSTR (14, 10); 12957 unsigned rn = INSTR (9, 5); 12958 unsigned rd = INSTR (4, 0); 12959 12960 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 12961 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12962 aarch64_get_reg_u32 (cpu, ra, NO_SP) 12963 + aarch64_get_reg_u32 (cpu, rn, NO_SP) 12964 * aarch64_get_reg_u32 (cpu, rm, NO_SP)); 12965 } 12966 12967 /* 64 bit multiply and add. */ 12968 static void 12969 madd64 (sim_cpu *cpu) 12970 { 12971 unsigned rm = INSTR (20, 16); 12972 unsigned ra = INSTR (14, 10); 12973 unsigned rn = INSTR (9, 5); 12974 unsigned rd = INSTR (4, 0); 12975 12976 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 12977 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12978 aarch64_get_reg_u64 (cpu, ra, NO_SP) 12979 + (aarch64_get_reg_u64 (cpu, rn, NO_SP) 12980 * aarch64_get_reg_u64 (cpu, rm, NO_SP))); 12981 } 12982 12983 /* 32 bit multiply and sub. */ 12984 static void 12985 msub32 (sim_cpu *cpu) 12986 { 12987 unsigned rm = INSTR (20, 16); 12988 unsigned ra = INSTR (14, 10); 12989 unsigned rn = INSTR (9, 5); 12990 unsigned rd = INSTR (4, 0); 12991 12992 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 12993 aarch64_set_reg_u64 (cpu, rd, NO_SP, 12994 aarch64_get_reg_u32 (cpu, ra, NO_SP) 12995 - aarch64_get_reg_u32 (cpu, rn, NO_SP) 12996 * aarch64_get_reg_u32 (cpu, rm, NO_SP)); 12997 } 12998 12999 /* 64 bit multiply and sub. */ 13000 static void 13001 msub64 (sim_cpu *cpu) 13002 { 13003 unsigned rm = INSTR (20, 16); 13004 unsigned ra = INSTR (14, 10); 13005 unsigned rn = INSTR (9, 5); 13006 unsigned rd = INSTR (4, 0); 13007 13008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13009 aarch64_set_reg_u64 (cpu, rd, NO_SP, 13010 aarch64_get_reg_u64 (cpu, ra, NO_SP) 13011 - aarch64_get_reg_u64 (cpu, rn, NO_SP) 13012 * aarch64_get_reg_u64 (cpu, rm, NO_SP)); 13013 } 13014 13015 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */ 13016 static void 13017 smaddl (sim_cpu *cpu) 13018 { 13019 unsigned rm = INSTR (20, 16); 13020 unsigned ra = INSTR (14, 10); 13021 unsigned rn = INSTR (9, 5); 13022 unsigned rd = INSTR (4, 0); 13023 13024 /* N.B. we need to multiply the signed 32 bit values in rn, rm to 13025 obtain a 64 bit product. */ 13026 aarch64_set_reg_s64 13027 (cpu, rd, NO_SP, 13028 aarch64_get_reg_s64 (cpu, ra, NO_SP) 13029 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP)) 13030 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP))); 13031 } 13032 13033 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */ 13034 static void 13035 smsubl (sim_cpu *cpu) 13036 { 13037 unsigned rm = INSTR (20, 16); 13038 unsigned ra = INSTR (14, 10); 13039 unsigned rn = INSTR (9, 5); 13040 unsigned rd = INSTR (4, 0); 13041 13042 /* N.B. we need to multiply the signed 32 bit values in rn, rm to 13043 obtain a 64 bit product. */ 13044 aarch64_set_reg_s64 13045 (cpu, rd, NO_SP, 13046 aarch64_get_reg_s64 (cpu, ra, NO_SP) 13047 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP)) 13048 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP))); 13049 } 13050 13051 /* Integer Multiply/Divide. */ 13052 13053 /* First some macros and a helper function. */ 13054 /* Macros to test or access elements of 64 bit words. */ 13055 13056 /* Mask used to access lo 32 bits of 64 bit unsigned int. */ 13057 #define LOW_WORD_MASK ((1ULL << 32) - 1) 13058 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */ 13059 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK) 13060 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */ 13061 #define highWordToU64(_value_u64) ((_value_u64) >> 32) 13062 13063 /* Offset of sign bit in 64 bit signed integger. */ 13064 #define SIGN_SHIFT_U64 63 13065 /* The sign bit itself -- also identifies the minimum negative int value. */ 13066 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64) 13067 /* Return true if a 64 bit signed int presented as an unsigned int is the 13068 most negative value. */ 13069 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64) 13070 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned 13071 int has its sign bit set to false. */ 13072 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64) 13073 /* Return 1L or -1L according to whether a 64 bit signed int presented as 13074 an unsigned int has its sign bit set or not. */ 13075 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L) 13076 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */ 13077 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64) 13078 13079 /* Multiply two 64 bit ints and return. 13080 the hi 64 bits of the 128 bit product. */ 13081 13082 static uint64_t 13083 mul64hi (uint64_t value1, uint64_t value2) 13084 { 13085 uint64_t resultmid1; 13086 uint64_t result; 13087 uint64_t value1_lo = lowWordToU64 (value1); 13088 uint64_t value1_hi = highWordToU64 (value1) ; 13089 uint64_t value2_lo = lowWordToU64 (value2); 13090 uint64_t value2_hi = highWordToU64 (value2); 13091 13092 /* Cross-multiply and collect results. */ 13093 uint64_t xproductlo = value1_lo * value2_lo; 13094 uint64_t xproductmid1 = value1_lo * value2_hi; 13095 uint64_t xproductmid2 = value1_hi * value2_lo; 13096 uint64_t xproducthi = value1_hi * value2_hi; 13097 uint64_t carry = 0; 13098 /* Start accumulating 64 bit results. */ 13099 /* Drop bottom half of lowest cross-product. */ 13100 uint64_t resultmid = xproductlo >> 32; 13101 /* Add in middle products. */ 13102 resultmid = resultmid + xproductmid1; 13103 13104 /* Check for overflow. */ 13105 if (resultmid < xproductmid1) 13106 /* Carry over 1 into top cross-product. */ 13107 carry++; 13108 13109 resultmid1 = resultmid + xproductmid2; 13110 13111 /* Check for overflow. */ 13112 if (resultmid1 < xproductmid2) 13113 /* Carry over 1 into top cross-product. */ 13114 carry++; 13115 13116 /* Drop lowest 32 bits of middle cross-product. */ 13117 result = resultmid1 >> 32; 13118 /* Move carry bit to just above middle cross-product highest bit. */ 13119 carry = carry << 32; 13120 13121 /* Add top cross-product plus and any carry. */ 13122 result += xproducthi + carry; 13123 13124 return result; 13125 } 13126 13127 /* Signed multiply high, source, source2 : 13128 64 bit, dest <-- high 64-bit of result. */ 13129 static void 13130 smulh (sim_cpu *cpu) 13131 { 13132 uint64_t uresult; 13133 int64_t result; 13134 unsigned rm = INSTR (20, 16); 13135 unsigned rn = INSTR (9, 5); 13136 unsigned rd = INSTR (4, 0); 13137 GReg ra = INSTR (14, 10); 13138 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); 13139 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); 13140 uint64_t uvalue1; 13141 uint64_t uvalue2; 13142 int negate = 0; 13143 13144 if (ra != R31) 13145 HALT_UNALLOC; 13146 13147 /* Convert to unsigned and use the unsigned mul64hi routine 13148 the fix the sign up afterwards. */ 13149 if (value1 < 0) 13150 { 13151 negate = !negate; 13152 uvalue1 = -value1; 13153 } 13154 else 13155 { 13156 uvalue1 = value1; 13157 } 13158 13159 if (value2 < 0) 13160 { 13161 negate = !negate; 13162 uvalue2 = -value2; 13163 } 13164 else 13165 { 13166 uvalue2 = value2; 13167 } 13168 13169 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13170 13171 uresult = mul64hi (uvalue1, uvalue2); 13172 result = uresult; 13173 13174 if (negate) 13175 { 13176 /* Multiply 128-bit result by -1, which means highpart gets inverted, 13177 and has carry in added only if low part is 0. */ 13178 result = ~result; 13179 if ((uvalue1 * uvalue2) == 0) 13180 result += 1; 13181 } 13182 13183 aarch64_set_reg_s64 (cpu, rd, NO_SP, result); 13184 } 13185 13186 /* Unsigned multiply add long -- source, source2 : 13187 32 bit, source3 : 64 bit. */ 13188 static void 13189 umaddl (sim_cpu *cpu) 13190 { 13191 unsigned rm = INSTR (20, 16); 13192 unsigned ra = INSTR (14, 10); 13193 unsigned rn = INSTR (9, 5); 13194 unsigned rd = INSTR (4, 0); 13195 13196 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13197 /* N.B. we need to multiply the signed 32 bit values in rn, rm to 13198 obtain a 64 bit product. */ 13199 aarch64_set_reg_u64 13200 (cpu, rd, NO_SP, 13201 aarch64_get_reg_u64 (cpu, ra, NO_SP) 13202 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP)) 13203 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP))); 13204 } 13205 13206 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */ 13207 static void 13208 umsubl (sim_cpu *cpu) 13209 { 13210 unsigned rm = INSTR (20, 16); 13211 unsigned ra = INSTR (14, 10); 13212 unsigned rn = INSTR (9, 5); 13213 unsigned rd = INSTR (4, 0); 13214 13215 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13216 /* N.B. we need to multiply the signed 32 bit values in rn, rm to 13217 obtain a 64 bit product. */ 13218 aarch64_set_reg_u64 13219 (cpu, rd, NO_SP, 13220 aarch64_get_reg_u64 (cpu, ra, NO_SP) 13221 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP)) 13222 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP))); 13223 } 13224 13225 /* Unsigned multiply high, source, source2 : 13226 64 bit, dest <-- high 64-bit of result. */ 13227 static void 13228 umulh (sim_cpu *cpu) 13229 { 13230 unsigned rm = INSTR (20, 16); 13231 unsigned rn = INSTR (9, 5); 13232 unsigned rd = INSTR (4, 0); 13233 GReg ra = INSTR (14, 10); 13234 13235 if (ra != R31) 13236 HALT_UNALLOC; 13237 13238 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13239 aarch64_set_reg_u64 (cpu, rd, NO_SP, 13240 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP), 13241 aarch64_get_reg_u64 (cpu, rm, NO_SP))); 13242 } 13243 13244 static void 13245 dexDataProc3Source (sim_cpu *cpu) 13246 { 13247 /* assert instr[28,24] == 11011. */ 13248 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least) 13249 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC 13250 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok 13251 instr[15] = o0 : 0/1 ==> ok 13252 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit) 13253 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only) 13254 0100 ==> SMULH, (64 bit only) 13255 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only) 13256 1100 ==> UMULH (64 bit only) 13257 ow ==> UNALLOC. */ 13258 13259 uint32_t dispatch; 13260 uint32_t size = INSTR (31, 31); 13261 uint32_t op54 = INSTR (30, 29); 13262 uint32_t op31 = INSTR (23, 21); 13263 uint32_t o0 = INSTR (15, 15); 13264 13265 if (op54 != 0) 13266 HALT_UNALLOC; 13267 13268 if (size == 0) 13269 { 13270 if (op31 != 0) 13271 HALT_UNALLOC; 13272 13273 if (o0 == 0) 13274 madd32 (cpu); 13275 else 13276 msub32 (cpu); 13277 return; 13278 } 13279 13280 dispatch = (op31 << 1) | o0; 13281 13282 switch (dispatch) 13283 { 13284 case 0: madd64 (cpu); return; 13285 case 1: msub64 (cpu); return; 13286 case 2: smaddl (cpu); return; 13287 case 3: smsubl (cpu); return; 13288 case 4: smulh (cpu); return; 13289 case 10: umaddl (cpu); return; 13290 case 11: umsubl (cpu); return; 13291 case 12: umulh (cpu); return; 13292 default: HALT_UNALLOC; 13293 } 13294 } 13295 13296 static void 13297 dexDPReg (sim_cpu *cpu) 13298 { 13299 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); 13300 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101 13301 bits [28:24:21] of a DPReg are the secondary dispatch vector. */ 13302 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu)); 13303 13304 switch (group2) 13305 { 13306 case DPREG_LOG_000: 13307 case DPREG_LOG_001: 13308 dexLogicalShiftedRegister (cpu); return; 13309 13310 case DPREG_ADDSHF_010: 13311 dexAddSubtractShiftedRegister (cpu); return; 13312 13313 case DPREG_ADDEXT_011: 13314 dexAddSubtractExtendedRegister (cpu); return; 13315 13316 case DPREG_ADDCOND_100: 13317 { 13318 /* This set bundles a variety of different operations. */ 13319 /* Check for. */ 13320 /* 1) add/sub w carry. */ 13321 uint32_t mask1 = 0x1FE00000U; 13322 uint32_t val1 = 0x1A000000U; 13323 /* 2) cond compare register/immediate. */ 13324 uint32_t mask2 = 0x1FE00000U; 13325 uint32_t val2 = 0x1A400000U; 13326 /* 3) cond select. */ 13327 uint32_t mask3 = 0x1FE00000U; 13328 uint32_t val3 = 0x1A800000U; 13329 /* 4) data proc 1/2 source. */ 13330 uint32_t mask4 = 0x1FE00000U; 13331 uint32_t val4 = 0x1AC00000U; 13332 13333 if ((aarch64_get_instr (cpu) & mask1) == val1) 13334 dexAddSubtractWithCarry (cpu); 13335 13336 else if ((aarch64_get_instr (cpu) & mask2) == val2) 13337 CondCompare (cpu); 13338 13339 else if ((aarch64_get_instr (cpu) & mask3) == val3) 13340 dexCondSelect (cpu); 13341 13342 else if ((aarch64_get_instr (cpu) & mask4) == val4) 13343 { 13344 /* Bit 30 is clear for data proc 2 source 13345 and set for data proc 1 source. */ 13346 if (aarch64_get_instr (cpu) & (1U << 30)) 13347 dexDataProc1Source (cpu); 13348 else 13349 dexDataProc2Source (cpu); 13350 } 13351 13352 else 13353 /* Should not reach here. */ 13354 HALT_NYI; 13355 13356 return; 13357 } 13358 13359 case DPREG_3SRC_110: 13360 dexDataProc3Source (cpu); return; 13361 13362 case DPREG_UNALLOC_101: 13363 HALT_UNALLOC; 13364 13365 case DPREG_3SRC_111: 13366 dexDataProc3Source (cpu); return; 13367 13368 default: 13369 /* Should never reach here. */ 13370 HALT_NYI; 13371 } 13372 } 13373 13374 /* Unconditional Branch immediate. 13375 Offset is a PC-relative byte offset in the range +/- 128MiB. 13376 The offset is assumed to be raw from the decode i.e. the 13377 simulator is expected to scale them from word offsets to byte. */ 13378 13379 /* Unconditional branch. */ 13380 static void 13381 buc (sim_cpu *cpu, int32_t offset) 13382 { 13383 aarch64_set_next_PC_by_offset (cpu, offset); 13384 } 13385 13386 static unsigned stack_depth = 0; 13387 13388 /* Unconditional branch and link -- writes return PC to LR. */ 13389 static void 13390 bl (sim_cpu *cpu, int32_t offset) 13391 { 13392 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13393 aarch64_save_LR (cpu); 13394 aarch64_set_next_PC_by_offset (cpu, offset); 13395 13396 if (TRACE_BRANCH_P (cpu)) 13397 { 13398 ++ stack_depth; 13399 TRACE_BRANCH (cpu, 13400 " %*scall %" PRIx64 " [%s]" 13401 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", 13402 stack_depth, " ", aarch64_get_next_PC (cpu), 13403 aarch64_get_func (CPU_STATE (cpu), 13404 aarch64_get_next_PC (cpu)), 13405 aarch64_get_reg_u64 (cpu, 0, NO_SP), 13406 aarch64_get_reg_u64 (cpu, 1, NO_SP), 13407 aarch64_get_reg_u64 (cpu, 2, NO_SP) 13408 ); 13409 } 13410 } 13411 13412 /* Unconditional Branch register. 13413 Branch/return address is in source register. */ 13414 13415 /* Unconditional branch. */ 13416 static void 13417 br (sim_cpu *cpu) 13418 { 13419 unsigned rn = INSTR (9, 5); 13420 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13421 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 13422 } 13423 13424 /* Unconditional branch and link -- writes return PC to LR. */ 13425 static void 13426 blr (sim_cpu *cpu) 13427 { 13428 /* Ensure we read the destination before we write LR. */ 13429 uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP); 13430 13431 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13432 aarch64_save_LR (cpu); 13433 aarch64_set_next_PC (cpu, target); 13434 13435 if (TRACE_BRANCH_P (cpu)) 13436 { 13437 ++ stack_depth; 13438 TRACE_BRANCH (cpu, 13439 " %*scall %" PRIx64 " [%s]" 13440 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", 13441 stack_depth, " ", aarch64_get_next_PC (cpu), 13442 aarch64_get_func (CPU_STATE (cpu), 13443 aarch64_get_next_PC (cpu)), 13444 aarch64_get_reg_u64 (cpu, 0, NO_SP), 13445 aarch64_get_reg_u64 (cpu, 1, NO_SP), 13446 aarch64_get_reg_u64 (cpu, 2, NO_SP) 13447 ); 13448 } 13449 } 13450 13451 /* Return -- assembler will default source to LR this is functionally 13452 equivalent to br but, presumably, unlike br it side effects the 13453 branch predictor. */ 13454 static void 13455 ret (sim_cpu *cpu) 13456 { 13457 unsigned rn = INSTR (9, 5); 13458 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); 13459 13460 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13461 if (TRACE_BRANCH_P (cpu)) 13462 { 13463 TRACE_BRANCH (cpu, 13464 " %*sreturn [result: %" PRIx64 "]", 13465 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP)); 13466 -- stack_depth; 13467 } 13468 } 13469 13470 /* NOP -- we implement this and call it from the decode in case we 13471 want to intercept it later. */ 13472 13473 static void 13474 nop (sim_cpu *cpu) 13475 { 13476 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13477 } 13478 13479 /* Data synchronization barrier. */ 13480 13481 static void 13482 dsb (sim_cpu *cpu) 13483 { 13484 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13485 } 13486 13487 /* Data memory barrier. */ 13488 13489 static void 13490 dmb (sim_cpu *cpu) 13491 { 13492 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13493 } 13494 13495 /* Instruction synchronization barrier. */ 13496 13497 static void 13498 isb (sim_cpu *cpu) 13499 { 13500 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13501 } 13502 13503 static void 13504 dexBranchImmediate (sim_cpu *cpu) 13505 { 13506 /* assert instr[30,26] == 00101 13507 instr[31] ==> 0 == B, 1 == BL 13508 instr[25,0] == imm26 branch offset counted in words. */ 13509 13510 uint32_t top = INSTR (31, 31); 13511 /* We have a 26 byte signed word offset which we need to pass to the 13512 execute routine as a signed byte offset. */ 13513 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2; 13514 13515 if (top) 13516 bl (cpu, offset); 13517 else 13518 buc (cpu, offset); 13519 } 13520 13521 /* Control Flow. */ 13522 13523 /* Conditional branch 13524 13525 Offset is a PC-relative byte offset in the range +/- 1MiB pos is 13526 a bit position in the range 0 .. 63 13527 13528 cc is a CondCode enum value as pulled out of the decode 13529 13530 N.B. any offset register (source) can only be Xn or Wn. */ 13531 13532 static void 13533 bcc (sim_cpu *cpu, int32_t offset, CondCode cc) 13534 { 13535 /* The test returns TRUE if CC is met. */ 13536 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13537 if (testConditionCode (cpu, cc)) 13538 aarch64_set_next_PC_by_offset (cpu, offset); 13539 } 13540 13541 /* 32 bit branch on register non-zero. */ 13542 static void 13543 cbnz32 (sim_cpu *cpu, int32_t offset) 13544 { 13545 unsigned rt = INSTR (4, 0); 13546 13547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13548 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0) 13549 aarch64_set_next_PC_by_offset (cpu, offset); 13550 } 13551 13552 /* 64 bit branch on register zero. */ 13553 static void 13554 cbnz (sim_cpu *cpu, int32_t offset) 13555 { 13556 unsigned rt = INSTR (4, 0); 13557 13558 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13559 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0) 13560 aarch64_set_next_PC_by_offset (cpu, offset); 13561 } 13562 13563 /* 32 bit branch on register non-zero. */ 13564 static void 13565 cbz32 (sim_cpu *cpu, int32_t offset) 13566 { 13567 unsigned rt = INSTR (4, 0); 13568 13569 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13570 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0) 13571 aarch64_set_next_PC_by_offset (cpu, offset); 13572 } 13573 13574 /* 64 bit branch on register zero. */ 13575 static void 13576 cbz (sim_cpu *cpu, int32_t offset) 13577 { 13578 unsigned rt = INSTR (4, 0); 13579 13580 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13581 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0) 13582 aarch64_set_next_PC_by_offset (cpu, offset); 13583 } 13584 13585 /* Branch on register bit test non-zero -- one size fits all. */ 13586 static void 13587 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset) 13588 { 13589 unsigned rt = INSTR (4, 0); 13590 13591 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13592 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)) 13593 aarch64_set_next_PC_by_offset (cpu, offset); 13594 } 13595 13596 /* Branch on register bit test zero -- one size fits all. */ 13597 static void 13598 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset) 13599 { 13600 unsigned rt = INSTR (4, 0); 13601 13602 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13603 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))) 13604 aarch64_set_next_PC_by_offset (cpu, offset); 13605 } 13606 13607 static void 13608 dexCompareBranchImmediate (sim_cpu *cpu) 13609 { 13610 /* instr[30,25] = 01 1010 13611 instr[31] = size : 0 ==> 32, 1 ==> 64 13612 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ 13613 instr[23,5] = simm19 branch offset counted in words 13614 instr[4,0] = rt */ 13615 13616 uint32_t size = INSTR (31, 31); 13617 uint32_t op = INSTR (24, 24); 13618 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; 13619 13620 if (size == 0) 13621 { 13622 if (op == 0) 13623 cbz32 (cpu, offset); 13624 else 13625 cbnz32 (cpu, offset); 13626 } 13627 else 13628 { 13629 if (op == 0) 13630 cbz (cpu, offset); 13631 else 13632 cbnz (cpu, offset); 13633 } 13634 } 13635 13636 static void 13637 dexTestBranchImmediate (sim_cpu *cpu) 13638 { 13639 /* instr[31] = b5 : bit 5 of test bit idx 13640 instr[30,25] = 01 1011 13641 instr[24] = op : 0 ==> TBZ, 1 == TBNZ 13642 instr[23,19] = b40 : bits 4 to 0 of test bit idx 13643 instr[18,5] = simm14 : signed offset counted in words 13644 instr[4,0] = uimm5 */ 13645 13646 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19)); 13647 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2; 13648 13649 NYI_assert (30, 25, 0x1b); 13650 13651 if (INSTR (24, 24) == 0) 13652 tbz (cpu, pos, offset); 13653 else 13654 tbnz (cpu, pos, offset); 13655 } 13656 13657 static void 13658 dexCondBranchImmediate (sim_cpu *cpu) 13659 { 13660 /* instr[31,25] = 010 1010 13661 instr[24] = op1; op => 00 ==> B.cond 13662 instr[23,5] = simm19 : signed offset counted in words 13663 instr[4] = op0 13664 instr[3,0] = cond */ 13665 13666 int32_t offset; 13667 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4)); 13668 13669 NYI_assert (31, 25, 0x2a); 13670 13671 if (op != 0) 13672 HALT_UNALLOC; 13673 13674 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; 13675 13676 bcc (cpu, offset, INSTR (3, 0)); 13677 } 13678 13679 static void 13680 dexBranchRegister (sim_cpu *cpu) 13681 { 13682 /* instr[31,25] = 110 1011 13683 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS 13684 instr[20,16] = op2 : must be 11111 13685 instr[15,10] = op3 : must be 000000 13686 instr[4,0] = op2 : must be 11111. */ 13687 13688 uint32_t op = INSTR (24, 21); 13689 uint32_t op2 = INSTR (20, 16); 13690 uint32_t op3 = INSTR (15, 10); 13691 uint32_t op4 = INSTR (4, 0); 13692 13693 NYI_assert (31, 25, 0x6b); 13694 13695 if (op2 != 0x1F || op3 != 0 || op4 != 0) 13696 HALT_UNALLOC; 13697 13698 if (op == 0) 13699 br (cpu); 13700 13701 else if (op == 1) 13702 blr (cpu); 13703 13704 else if (op == 2) 13705 ret (cpu); 13706 13707 else 13708 { 13709 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */ 13710 /* anything else is unallocated. */ 13711 uint32_t rn = INSTR (4, 0); 13712 13713 if (rn != 0x1f) 13714 HALT_UNALLOC; 13715 13716 if (op == 4 || op == 5) 13717 HALT_NYI; 13718 13719 HALT_UNALLOC; 13720 } 13721 } 13722 13723 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h 13724 but this may not be available. So instead we define the values we need 13725 here. */ 13726 #define AngelSVC_Reason_Open 0x01 13727 #define AngelSVC_Reason_Close 0x02 13728 #define AngelSVC_Reason_Write 0x05 13729 #define AngelSVC_Reason_Read 0x06 13730 #define AngelSVC_Reason_IsTTY 0x09 13731 #define AngelSVC_Reason_Seek 0x0A 13732 #define AngelSVC_Reason_FLen 0x0C 13733 #define AngelSVC_Reason_Remove 0x0E 13734 #define AngelSVC_Reason_Rename 0x0F 13735 #define AngelSVC_Reason_Clock 0x10 13736 #define AngelSVC_Reason_Time 0x11 13737 #define AngelSVC_Reason_System 0x12 13738 #define AngelSVC_Reason_Errno 0x13 13739 #define AngelSVC_Reason_GetCmdLine 0x15 13740 #define AngelSVC_Reason_HeapInfo 0x16 13741 #define AngelSVC_Reason_ReportException 0x18 13742 #define AngelSVC_Reason_Elapsed 0x30 13743 13744 13745 static void 13746 handle_halt (sim_cpu *cpu, uint32_t val) 13747 { 13748 uint64_t result = 0; 13749 13750 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 13751 if (val != 0xf000) 13752 { 13753 TRACE_SYSCALL (cpu, " HLT [0x%x]", val); 13754 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13755 sim_stopped, SIM_SIGTRAP); 13756 } 13757 13758 /* We have encountered an Angel SVC call. See if we can process it. */ 13759 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP)) 13760 { 13761 case AngelSVC_Reason_HeapInfo: 13762 { 13763 /* Get the values. */ 13764 uint64_t stack_top = aarch64_get_stack_start (cpu); 13765 uint64_t heap_base = aarch64_get_heap_start (cpu); 13766 13767 /* Get the pointer */ 13768 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13769 ptr = aarch64_get_mem_u64 (cpu, ptr); 13770 13771 /* Fill in the memory block. */ 13772 /* Start addr of heap. */ 13773 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base); 13774 /* End addr of heap. */ 13775 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top); 13776 /* Lowest stack addr. */ 13777 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base); 13778 /* Initial stack addr. */ 13779 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top); 13780 13781 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info"); 13782 } 13783 break; 13784 13785 case AngelSVC_Reason_Open: 13786 { 13787 /* Get the pointer */ 13788 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */ 13789 /* FIXME: For now we just assume that we will only be asked 13790 to open the standard file descriptors. */ 13791 static int fd = 0; 13792 result = fd ++; 13793 13794 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1); 13795 } 13796 break; 13797 13798 case AngelSVC_Reason_Close: 13799 { 13800 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13801 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh); 13802 result = 0; 13803 } 13804 break; 13805 13806 case AngelSVC_Reason_Errno: 13807 result = 0; 13808 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno"); 13809 break; 13810 13811 case AngelSVC_Reason_Clock: 13812 result = 13813 #ifdef CLOCKS_PER_SEC 13814 (CLOCKS_PER_SEC >= 100) 13815 ? (clock () / (CLOCKS_PER_SEC / 100)) 13816 : ((clock () * 100) / CLOCKS_PER_SEC) 13817 #else 13818 /* Presume unix... clock() returns microseconds. */ 13819 (clock () / 10000) 13820 #endif 13821 ; 13822 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock"); 13823 break; 13824 13825 case AngelSVC_Reason_GetCmdLine: 13826 { 13827 /* Get the pointer */ 13828 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13829 ptr = aarch64_get_mem_u64 (cpu, ptr); 13830 13831 /* FIXME: No command line for now. */ 13832 aarch64_set_mem_u64 (cpu, ptr, 0); 13833 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line"); 13834 } 13835 break; 13836 13837 case AngelSVC_Reason_IsTTY: 13838 result = 1; 13839 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?"); 13840 break; 13841 13842 case AngelSVC_Reason_Write: 13843 { 13844 /* Get the pointer */ 13845 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13846 /* Get the write control block. */ 13847 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr); 13848 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8); 13849 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16); 13850 13851 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %" 13852 PRIx64 " on descriptor %" PRIx64, 13853 len, buf, fd); 13854 13855 if (len > 1280) 13856 { 13857 TRACE_SYSCALL (cpu, 13858 " AngelSVC: Write: Suspiciously long write: %ld", 13859 (long) len); 13860 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13861 sim_stopped, SIM_SIGBUS); 13862 } 13863 else if (fd == 1) 13864 { 13865 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf)); 13866 } 13867 else if (fd == 2) 13868 { 13869 TRACE (cpu, 0, "\n"); 13870 sim_io_eprintf (CPU_STATE (cpu), "%.*s", 13871 (int) len, aarch64_get_mem_ptr (cpu, buf)); 13872 TRACE (cpu, 0, "\n"); 13873 } 13874 else 13875 { 13876 TRACE_SYSCALL (cpu, 13877 " AngelSVC: Write: Unexpected file handle: %d", 13878 (int) fd); 13879 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13880 sim_stopped, SIM_SIGABRT); 13881 } 13882 } 13883 break; 13884 13885 case AngelSVC_Reason_ReportException: 13886 { 13887 /* Get the pointer */ 13888 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); 13889 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */ 13890 uint64_t type = aarch64_get_mem_u64 (cpu, ptr); 13891 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8); 13892 13893 TRACE_SYSCALL (cpu, 13894 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64, 13895 type, state); 13896 13897 if (type == 0x20026) 13898 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13899 sim_exited, state); 13900 else 13901 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13902 sim_stopped, SIM_SIGINT); 13903 } 13904 break; 13905 13906 case AngelSVC_Reason_Read: 13907 case AngelSVC_Reason_FLen: 13908 case AngelSVC_Reason_Seek: 13909 case AngelSVC_Reason_Remove: 13910 case AngelSVC_Reason_Time: 13911 case AngelSVC_Reason_System: 13912 case AngelSVC_Reason_Rename: 13913 case AngelSVC_Reason_Elapsed: 13914 default: 13915 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]", 13916 aarch64_get_reg_u32 (cpu, 0, NO_SP)); 13917 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13918 sim_stopped, SIM_SIGTRAP); 13919 } 13920 13921 aarch64_set_reg_u64 (cpu, 0, NO_SP, result); 13922 } 13923 13924 static void 13925 dexExcpnGen (sim_cpu *cpu) 13926 { 13927 /* instr[31:24] = 11010100 13928 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK 13929 010 ==> HLT, 101 ==> DBG GEN EXCPN 13930 instr[20,5] = imm16 13931 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC 13932 instr[1,0] = LL : discriminates opc */ 13933 13934 uint32_t opc = INSTR (23, 21); 13935 uint32_t imm16 = INSTR (20, 5); 13936 uint32_t opc2 = INSTR (4, 2); 13937 uint32_t LL; 13938 13939 NYI_assert (31, 24, 0xd4); 13940 13941 if (opc2 != 0) 13942 HALT_UNALLOC; 13943 13944 LL = INSTR (1, 0); 13945 13946 /* We only implement HLT and BRK for now. */ 13947 if (opc == 1 && LL == 0) 13948 { 13949 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16); 13950 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), 13951 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK)); 13952 } 13953 13954 if (opc == 2 && LL == 0) 13955 handle_halt (cpu, imm16); 13956 13957 else if (opc == 0 || opc == 5) 13958 HALT_NYI; 13959 13960 else 13961 HALT_UNALLOC; 13962 } 13963 13964 /* Stub for accessing system registers. */ 13965 13966 static uint64_t 13967 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, 13968 unsigned crm, unsigned op2) 13969 { 13970 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7) 13971 /* DCZID_EL0 - the Data Cache Zero ID register. 13972 We do not support DC ZVA at the moment, so 13973 we return a value with the disable bit set. 13974 We implement support for the DCZID register since 13975 it is used by the C library's memset function. */ 13976 return ((uint64_t) 1) << 4; 13977 13978 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1) 13979 /* Cache Type Register. */ 13980 return 0x80008000UL; 13981 13982 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2) 13983 /* TPIDR_EL0 - thread pointer id. */ 13984 return aarch64_get_thread_id (cpu); 13985 13986 if (op1 == 3 && crm == 4 && op2 == 0) 13987 return aarch64_get_FPCR (cpu); 13988 13989 if (op1 == 3 && crm == 4 && op2 == 1) 13990 return aarch64_get_FPSR (cpu); 13991 13992 else if (op1 == 3 && crm == 2 && op2 == 0) 13993 return aarch64_get_CPSR (cpu); 13994 13995 HALT_NYI; 13996 } 13997 13998 static void 13999 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, 14000 unsigned crm, unsigned op2, uint64_t val) 14001 { 14002 if (op1 == 3 && crm == 4 && op2 == 0) 14003 aarch64_set_FPCR (cpu, val); 14004 14005 else if (op1 == 3 && crm == 4 && op2 == 1) 14006 aarch64_set_FPSR (cpu, val); 14007 14008 else if (op1 == 3 && crm == 2 && op2 == 0) 14009 aarch64_set_CPSR (cpu, val); 14010 14011 else 14012 HALT_NYI; 14013 } 14014 14015 static void 14016 do_mrs (sim_cpu *cpu) 14017 { 14018 /* instr[31:20] = 1101 0101 0001 1 14019 instr[19] = op0 14020 instr[18,16] = op1 14021 instr[15,12] = CRn 14022 instr[11,8] = CRm 14023 instr[7,5] = op2 14024 instr[4,0] = Rt */ 14025 unsigned sys_op0 = INSTR (19, 19) + 2; 14026 unsigned sys_op1 = INSTR (18, 16); 14027 unsigned sys_crn = INSTR (15, 12); 14028 unsigned sys_crm = INSTR (11, 8); 14029 unsigned sys_op2 = INSTR (7, 5); 14030 unsigned rt = INSTR (4, 0); 14031 14032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 14033 aarch64_set_reg_u64 (cpu, rt, NO_SP, 14034 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2)); 14035 } 14036 14037 static void 14038 do_MSR_immediate (sim_cpu *cpu) 14039 { 14040 /* instr[31:19] = 1101 0101 0000 0 14041 instr[18,16] = op1 14042 instr[15,12] = 0100 14043 instr[11,8] = CRm 14044 instr[7,5] = op2 14045 instr[4,0] = 1 1111 */ 14046 14047 unsigned op1 = INSTR (18, 16); 14048 /*unsigned crm = INSTR (11, 8);*/ 14049 unsigned op2 = INSTR (7, 5); 14050 14051 NYI_assert (31, 19, 0x1AA0); 14052 NYI_assert (15, 12, 0x4); 14053 NYI_assert (4, 0, 0x1F); 14054 14055 if (op1 == 0) 14056 { 14057 if (op2 == 5) 14058 HALT_NYI; /* set SPSel. */ 14059 else 14060 HALT_UNALLOC; 14061 } 14062 else if (op1 == 3) 14063 { 14064 if (op2 == 6) 14065 HALT_NYI; /* set DAIFset. */ 14066 else if (op2 == 7) 14067 HALT_NYI; /* set DAIFclr. */ 14068 else 14069 HALT_UNALLOC; 14070 } 14071 else 14072 HALT_UNALLOC; 14073 } 14074 14075 static void 14076 do_MSR_reg (sim_cpu *cpu) 14077 { 14078 /* instr[31:20] = 1101 0101 0001 14079 instr[19] = op0 14080 instr[18,16] = op1 14081 instr[15,12] = CRn 14082 instr[11,8] = CRm 14083 instr[7,5] = op2 14084 instr[4,0] = Rt */ 14085 14086 unsigned sys_op0 = INSTR (19, 19) + 2; 14087 unsigned sys_op1 = INSTR (18, 16); 14088 unsigned sys_crn = INSTR (15, 12); 14089 unsigned sys_crm = INSTR (11, 8); 14090 unsigned sys_op2 = INSTR (7, 5); 14091 unsigned rt = INSTR (4, 0); 14092 14093 NYI_assert (31, 20, 0xD51); 14094 14095 TRACE_DECODE (cpu, "emulated at line %d", __LINE__); 14096 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2, 14097 aarch64_get_reg_u64 (cpu, rt, NO_SP)); 14098 } 14099 14100 static void 14101 do_SYS (sim_cpu *cpu) 14102 { 14103 /* instr[31,19] = 1101 0101 0000 1 14104 instr[18,16] = op1 14105 instr[15,12] = CRn 14106 instr[11,8] = CRm 14107 instr[7,5] = op2 14108 instr[4,0] = Rt */ 14109 NYI_assert (31, 19, 0x1AA1); 14110 14111 /* FIXME: For now we just silently accept system ops. */ 14112 } 14113 14114 static void 14115 dexSystem (sim_cpu *cpu) 14116 { 14117 /* instr[31:22] = 1101 01010 0 14118 instr[21] = L 14119 instr[20,19] = op0 14120 instr[18,16] = op1 14121 instr[15,12] = CRn 14122 instr[11,8] = CRm 14123 instr[7,5] = op2 14124 instr[4,0] = uimm5 */ 14125 14126 /* We are interested in HINT, DSB, DMB and ISB 14127 14128 Hint #0 encodes NOOP (this is the only hint we care about) 14129 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111, 14130 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101 14131 14132 DSB, DMB, ISB are data store barrier, data memory barrier and 14133 instruction store barrier, respectively, where 14134 14135 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111, 14136 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110 14137 CRm<3:2> ==> domain, CRm<1:0> ==> types, 14138 domain : 00 ==> OuterShareable, 01 ==> Nonshareable, 14139 10 ==> InerShareable, 11 ==> FullSystem 14140 types : 01 ==> Reads, 10 ==> Writes, 14141 11 ==> All, 00 ==> All (domain == FullSystem). */ 14142 14143 unsigned rt = INSTR (4, 0); 14144 14145 NYI_assert (31, 22, 0x354); 14146 14147 switch (INSTR (21, 12)) 14148 { 14149 case 0x032: 14150 if (rt == 0x1F) 14151 { 14152 /* NOP has CRm != 0000 OR. */ 14153 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */ 14154 uint32_t crm = INSTR (11, 8); 14155 uint32_t op2 = INSTR (7, 5); 14156 14157 if (crm != 0 || (op2 == 0 || op2 > 5)) 14158 { 14159 /* Actually call nop method so we can reimplement it later. */ 14160 nop (cpu); 14161 return; 14162 } 14163 } 14164 HALT_NYI; 14165 14166 case 0x033: 14167 { 14168 uint32_t op2 = INSTR (7, 5); 14169 14170 switch (op2) 14171 { 14172 case 2: HALT_NYI; 14173 case 4: dsb (cpu); return; 14174 case 5: dmb (cpu); return; 14175 case 6: isb (cpu); return; 14176 default: HALT_UNALLOC; 14177 } 14178 } 14179 14180 case 0x3B0: 14181 case 0x3B4: 14182 case 0x3BD: 14183 do_mrs (cpu); 14184 return; 14185 14186 case 0x0B7: 14187 do_SYS (cpu); /* DC is an alias of SYS. */ 14188 return; 14189 14190 default: 14191 if (INSTR (21, 20) == 0x1) 14192 do_MSR_reg (cpu); 14193 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4) 14194 do_MSR_immediate (cpu); 14195 else 14196 HALT_NYI; 14197 return; 14198 } 14199 } 14200 14201 static void 14202 dexBr (sim_cpu *cpu) 14203 { 14204 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); 14205 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011 14206 bits [31,29] of a BrExSys are the secondary dispatch vector. */ 14207 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu)); 14208 14209 switch (group2) 14210 { 14211 case BR_IMM_000: 14212 return dexBranchImmediate (cpu); 14213 14214 case BR_IMMCMP_001: 14215 /* Compare has bit 25 clear while test has it set. */ 14216 if (!INSTR (25, 25)) 14217 dexCompareBranchImmediate (cpu); 14218 else 14219 dexTestBranchImmediate (cpu); 14220 return; 14221 14222 case BR_IMMCOND_010: 14223 /* This is a conditional branch if bit 25 is clear otherwise 14224 unallocated. */ 14225 if (!INSTR (25, 25)) 14226 dexCondBranchImmediate (cpu); 14227 else 14228 HALT_UNALLOC; 14229 return; 14230 14231 case BR_UNALLOC_011: 14232 HALT_UNALLOC; 14233 14234 case BR_IMM_100: 14235 dexBranchImmediate (cpu); 14236 return; 14237 14238 case BR_IMMCMP_101: 14239 /* Compare has bit 25 clear while test has it set. */ 14240 if (!INSTR (25, 25)) 14241 dexCompareBranchImmediate (cpu); 14242 else 14243 dexTestBranchImmediate (cpu); 14244 return; 14245 14246 case BR_REG_110: 14247 /* Unconditional branch reg has bit 25 set. */ 14248 if (INSTR (25, 25)) 14249 dexBranchRegister (cpu); 14250 14251 /* This includes both Excpn Gen, System and unalloc operations. 14252 We need to decode the Excpn Gen operation BRK so we can plant 14253 debugger entry points. 14254 Excpn Gen operations have instr [24] = 0. 14255 we need to decode at least one of the System operations NOP 14256 which is an alias for HINT #0. 14257 System operations have instr [24,22] = 100. */ 14258 else if (INSTR (24, 24) == 0) 14259 dexExcpnGen (cpu); 14260 14261 else if (INSTR (24, 22) == 4) 14262 dexSystem (cpu); 14263 14264 else 14265 HALT_UNALLOC; 14266 14267 return; 14268 14269 case BR_UNALLOC_111: 14270 HALT_UNALLOC; 14271 14272 default: 14273 /* Should never reach here. */ 14274 HALT_NYI; 14275 } 14276 } 14277 14278 static void 14279 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc) 14280 { 14281 /* We need to check if gdb wants an in here. */ 14282 /* checkBreak (cpu);. */ 14283 14284 uint64_t group = dispatchGroup (aarch64_get_instr (cpu)); 14285 14286 switch (group) 14287 { 14288 case GROUP_PSEUDO_0000: dexPseudo (cpu); break; 14289 case GROUP_LDST_0100: dexLdSt (cpu); break; 14290 case GROUP_DPREG_0101: dexDPReg (cpu); break; 14291 case GROUP_LDST_0110: dexLdSt (cpu); break; 14292 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break; 14293 case GROUP_DPIMM_1000: dexDPImm (cpu); break; 14294 case GROUP_DPIMM_1001: dexDPImm (cpu); break; 14295 case GROUP_BREXSYS_1010: dexBr (cpu); break; 14296 case GROUP_BREXSYS_1011: dexBr (cpu); break; 14297 case GROUP_LDST_1100: dexLdSt (cpu); break; 14298 case GROUP_DPREG_1101: dexDPReg (cpu); break; 14299 case GROUP_LDST_1110: dexLdSt (cpu); break; 14300 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break; 14301 14302 case GROUP_UNALLOC_0001: 14303 case GROUP_UNALLOC_0010: 14304 case GROUP_UNALLOC_0011: 14305 HALT_UNALLOC; 14306 14307 default: 14308 /* Should never reach here. */ 14309 HALT_NYI; 14310 } 14311 } 14312 14313 static bfd_boolean 14314 aarch64_step (sim_cpu *cpu) 14315 { 14316 uint64_t pc = aarch64_get_PC (cpu); 14317 14318 if (pc == TOP_LEVEL_RETURN_PC) 14319 return FALSE; 14320 14321 aarch64_set_next_PC (cpu, pc + 4); 14322 14323 /* Code is always little-endian. */ 14324 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map, 14325 & aarch64_get_instr (cpu), pc, 4); 14326 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu)); 14327 14328 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc, 14329 aarch64_get_instr (cpu)); 14330 TRACE_DISASM (cpu, pc); 14331 14332 aarch64_decode_and_execute (cpu, pc); 14333 14334 return TRUE; 14335 } 14336 14337 void 14338 aarch64_run (SIM_DESC sd) 14339 { 14340 sim_cpu *cpu = STATE_CPU (sd, 0); 14341 14342 while (aarch64_step (cpu)) 14343 { 14344 aarch64_update_PC (cpu); 14345 14346 if (sim_events_tick (sd)) 14347 sim_events_process (sd); 14348 } 14349 14350 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu), 14351 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP)); 14352 } 14353 14354 void 14355 aarch64_init (sim_cpu *cpu, uint64_t pc) 14356 { 14357 uint64_t sp = aarch64_get_stack_start (cpu); 14358 14359 /* Install SP, FP and PC and set LR to -20 14360 so we can detect a top-level return. */ 14361 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp); 14362 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp); 14363 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC); 14364 aarch64_set_next_PC (cpu, pc); 14365 aarch64_update_PC (cpu); 14366 aarch64_init_LIT_table (); 14367 } 14368