1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMP(O|U)(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "ADD(I|IS)?(8)?$"), 142 (instregex "LI(S)?(8)?$"), 143 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 144 (instregex "NAND(8)?(_rec)?$"), 145 (instregex "AND(C)?(8)?(_rec)?$"), 146 (instregex "NOR(8)?(_rec)?$"), 147 (instregex "OR(C)?(8)?(_rec)?$"), 148 (instregex "EQV(8)?(_rec)?$"), 149 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 150 (instregex "ADD(4|8)(TLS)?(_)?$"), 151 (instregex "NEG(8)?(O)?$"), 152 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 153 COPY, 154 MCRF, 155 MCRXRX, 156 XSNABSDP, 157 XSXEXPDP, 158 XSABSDP, 159 XSNEGDP, 160 XSCPSGNDP, 161 MFVSRWZ, 162 MFVRWZ, 163 EXTSWSLI, 164 SRADI_32, 165 RLDIC, 166 RFEBB, 167 LA, 168 TBEGIN, 169 TRECHKPT, 170 NOP, 171 WAIT 172)>; 173 174// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 175// single slice. However, since it is Restricted, it requires all 3 dispatches 176// (DISP) for that superslice. 177def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 178 (instrs 179 (instregex "RLDC(L|R)$"), 180 (instregex "RLWIMI(8)?$"), 181 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 182 (instregex "M(F|T)OCRF(8)?$"), 183 (instregex "CR(6)?(UN)?SET$"), 184 (instregex "CR(N)?(OR|AND)(C)?$"), 185 (instregex "S(L|R)W(8)?$"), 186 (instregex "RLW(INM|NM)(8)?$"), 187 (instregex "F(N)?ABS(D|S)$"), 188 (instregex "FNEG(D|S)$"), 189 (instregex "FCPSGN(D|S)$"), 190 (instregex "SRAW(I)?$"), 191 (instregex "ISEL(8)?$"), 192 RLDIMI, 193 XSIEXPDP, 194 FMR, 195 CREQV, 196 CRXOR, 197 TRECLAIM, 198 TSR, 199 TABORT 200)>; 201 202// Three cycle ALU vector operation that uses an entire superslice. 203// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 204// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 205def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 206 (instrs 207 (instregex "M(T|F)VSCR$"), 208 (instregex "VCMPNEZ(B|H|W)$"), 209 (instregex "VCMPEQU(B|H|W|D)$"), 210 (instregex "VCMPNE(B|H|W)$"), 211 (instregex "VABSDU(B|H|W)$"), 212 (instregex "VADDU(B|H|W)S$"), 213 (instregex "VAVG(S|U)(B|H|W)$"), 214 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 215 (instregex "VCMPBFP(_rec)?$"), 216 (instregex "VC(L|T)Z(B|H|W|D)$"), 217 (instregex "VADDS(B|H|W)S$"), 218 (instregex "V(MIN|MAX)FP$"), 219 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 220 VBPERMD, 221 VADDCUW, 222 VPOPCNTW, 223 VPOPCNTD, 224 VPRTYBD, 225 VPRTYBW, 226 VSHASIGMAD, 227 VSHASIGMAW, 228 VSUBSBS, 229 VSUBSHS, 230 VSUBSWS, 231 VSUBUBS, 232 VSUBUHS, 233 VSUBUWS, 234 VSUBCUW, 235 VCMPGTSB, 236 VCMPGTSB_rec, 237 VCMPGTSD, 238 VCMPGTSD_rec, 239 VCMPGTSH, 240 VCMPGTSH_rec, 241 VCMPGTSW, 242 VCMPGTSW_rec, 243 VCMPGTUB, 244 VCMPGTUB_rec, 245 VCMPGTUD, 246 VCMPGTUD_rec, 247 VCMPGTUH, 248 VCMPGTUH_rec, 249 VCMPGTUW, 250 VCMPGTUW_rec, 251 VCMPNEB_rec, 252 VCMPNEH_rec, 253 VCMPNEW_rec, 254 VCMPNEZB_rec, 255 VCMPNEZH_rec, 256 VCMPNEZW_rec, 257 VCMPEQUB_rec, 258 VCMPEQUD_rec, 259 VCMPEQUH_rec, 260 VCMPEQUW_rec, 261 XVCMPEQDP, 262 XVCMPEQDP_rec, 263 XVCMPEQSP, 264 XVCMPEQSP_rec, 265 XVCMPGEDP, 266 XVCMPGEDP_rec, 267 XVCMPGESP, 268 XVCMPGESP_rec, 269 XVCMPGTDP, 270 XVCMPGTDP_rec, 271 XVCMPGTSP, 272 XVCMPGTSP_rec, 273 XVMAXDP, 274 XVMAXSP, 275 XVMINDP, 276 XVMINSP, 277 XVTDIVDP, 278 XVTDIVSP, 279 XVTSQRTDP, 280 XVTSQRTSP, 281 XVTSTDCDP, 282 XVTSTDCSP, 283 XVXSIGDP, 284 XVXSIGSP 285)>; 286 287// 7 cycle DP vector operation that uses an entire superslice. 288// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 289// EXECO) and all three dispatches (DISP) to the given superslice. 290def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 291 (instrs 292 VADDFP, 293 VCTSXS, 294 VCTSXS_0, 295 VCTUXS, 296 VCTUXS_0, 297 VEXPTEFP, 298 VLOGEFP, 299 VMADDFP, 300 VMHADDSHS, 301 VNMSUBFP, 302 VREFP, 303 VRFIM, 304 VRFIN, 305 VRFIP, 306 VRFIZ, 307 VRSQRTEFP, 308 VSUBFP, 309 XVADDDP, 310 XVADDSP, 311 XVCVDPSP, 312 XVCVDPSXDS, 313 XVCVDPSXWS, 314 XVCVDPUXDS, 315 XVCVDPUXWS, 316 XVCVHPSP, 317 XVCVSPDP, 318 XVCVSPHP, 319 XVCVSPSXDS, 320 XVCVSPSXWS, 321 XVCVSPUXDS, 322 XVCVSPUXWS, 323 XVCVSXDDP, 324 XVCVSXDSP, 325 XVCVSXWDP, 326 XVCVSXWSP, 327 XVCVUXDDP, 328 XVCVUXDSP, 329 XVCVUXWDP, 330 XVCVUXWSP, 331 XVMADDADP, 332 XVMADDASP, 333 XVMADDMDP, 334 XVMADDMSP, 335 XVMSUBADP, 336 XVMSUBASP, 337 XVMSUBMDP, 338 XVMSUBMSP, 339 XVMULDP, 340 XVMULSP, 341 XVNMADDADP, 342 XVNMADDASP, 343 XVNMADDMDP, 344 XVNMADDMSP, 345 XVNMSUBADP, 346 XVNMSUBASP, 347 XVNMSUBMDP, 348 XVNMSUBMSP, 349 XVRDPI, 350 XVRDPIC, 351 XVRDPIM, 352 XVRDPIP, 353 XVRDPIZ, 354 XVREDP, 355 XVRESP, 356 XVRSPI, 357 XVRSPIC, 358 XVRSPIM, 359 XVRSPIP, 360 XVRSPIZ, 361 XVRSQRTEDP, 362 XVRSQRTESP, 363 XVSUBDP, 364 XVSUBSP, 365 VCFSX, 366 VCFSX_0, 367 VCFUX, 368 VCFUX_0, 369 VMHRADDSHS, 370 VMLADDUHM, 371 VMSUMMBM, 372 VMSUMSHM, 373 VMSUMSHS, 374 VMSUMUBM, 375 VMSUMUHM, 376 VMSUMUDM, 377 VMSUMUHS, 378 VMULESB, 379 VMULESH, 380 VMULESW, 381 VMULEUB, 382 VMULEUH, 383 VMULEUW, 384 VMULOSB, 385 VMULOSH, 386 VMULOSW, 387 VMULOUB, 388 VMULOUH, 389 VMULOUW, 390 VMULUWM, 391 VSUM2SWS, 392 VSUM4SBS, 393 VSUM4SHS, 394 VSUM4UBS, 395 VSUMSWS 396)>; 397 398// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 399// dispatch units for the superslice. 400def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 401 (instrs 402 (instregex "MADD(HD|HDU|LD|LD8)$"), 403 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 404)>; 405 406// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 407// dispatch units for the superslice. 408def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 409 (instrs 410 FRSP, 411 (instregex "FRI(N|P|Z|M)(D|S)$"), 412 (instregex "FRE(S)?$"), 413 (instregex "FADD(S)?$"), 414 (instregex "FMSUB(S)?$"), 415 (instregex "FMADD(S)?$"), 416 (instregex "FSUB(S)?$"), 417 (instregex "FCFID(U)?(S)?$"), 418 (instregex "FCTID(U)?(Z)?$"), 419 (instregex "FCTIW(U)?(Z)?$"), 420 (instregex "FRSQRTE(S)?$"), 421 FNMADDS, 422 FNMADD, 423 FNMSUBS, 424 FNMSUB, 425 FSELD, 426 FSELS, 427 FMULS, 428 FMUL, 429 XSMADDADP, 430 XSMADDASP, 431 XSMADDMDP, 432 XSMADDMSP, 433 XSMSUBADP, 434 XSMSUBASP, 435 XSMSUBMDP, 436 XSMSUBMSP, 437 XSMULDP, 438 XSMULSP, 439 XSNMADDADP, 440 XSNMADDASP, 441 XSNMADDMDP, 442 XSNMADDMSP, 443 XSNMSUBADP, 444 XSNMSUBASP, 445 XSNMSUBMDP, 446 XSNMSUBMSP 447)>; 448 449// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 450// These operations can be done in parallel. The DP is restricted so we need a 451// full 4 dispatches. 452def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 453 DISP_3SLOTS_1C, DISP_1C], 454 (instrs 455 (instregex "FSEL(D|S)_rec$") 456)>; 457 458// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 459def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 460 DISP_3SLOTS_1C, DISP_1C], 461 (instrs 462 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 463)>; 464 465// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 466// These operations must be done sequentially.The DP is restricted so we need a 467// full 4 dispatches. 468def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 469 DISP_3SLOTS_1C, DISP_1C], 470 (instrs 471 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 472 (instregex "FRE(S)?_rec$"), 473 (instregex "FADD(S)?_rec$"), 474 (instregex "FSUB(S)?_rec$"), 475 (instregex "F(N)?MSUB(S)?_rec$"), 476 (instregex "F(N)?MADD(S)?_rec$"), 477 (instregex "FCFID(U)?(S)?_rec$"), 478 (instregex "FCTID(U)?(Z)?_rec$"), 479 (instregex "FCTIW(U)?(Z)?_rec$"), 480 (instregex "FMUL(S)?_rec$"), 481 (instregex "FRSQRTE(S)?_rec$"), 482 FRSP_rec 483)>; 484 485// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 486def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 487 (instrs 488 XSADDDP, 489 XSADDSP, 490 XSCVDPHP, 491 XSCVDPSP, 492 XSCVDPSXDS, 493 XSCVDPSXDSs, 494 XSCVDPSXWS, 495 XSCVDPUXDS, 496 XSCVDPUXDSs, 497 XSCVDPUXWS, 498 XSCVDPSXWSs, 499 XSCVDPUXWSs, 500 XSCVHPDP, 501 XSCVSPDP, 502 XSCVSXDDP, 503 XSCVSXDSP, 504 XSCVUXDDP, 505 XSCVUXDSP, 506 XSRDPI, 507 XSRDPIC, 508 XSRDPIM, 509 XSRDPIP, 510 XSRDPIZ, 511 XSREDP, 512 XSRESP, 513 XSRSQRTEDP, 514 XSRSQRTESP, 515 XSSUBDP, 516 XSSUBSP, 517 XSCVDPSPN, 518 XSRSP 519)>; 520 521// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 522// superslice. That includes both exec pipelines (EXECO, EXECE) and one 523// dispatch. 524def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 525 (instrs 526 (instregex "LVS(L|R)$"), 527 (instregex "VSPLTIS(W|H|B)$"), 528 (instregex "VSPLT(W|H|B)(s)?$"), 529 (instregex "V_SETALLONES(B|H)?$"), 530 (instregex "VEXTRACTU(B|H|W)$"), 531 (instregex "VINSERT(B|H|W|D)$"), 532 MFVSRLD, 533 MTVSRWS, 534 VBPERMQ, 535 VCLZLSBB, 536 VCTZLSBB, 537 VEXTRACTD, 538 VEXTUBLX, 539 VEXTUBRX, 540 VEXTUHLX, 541 VEXTUHRX, 542 VEXTUWLX, 543 VEXTUWRX, 544 VGBBD, 545 VMRGHB, 546 VMRGHH, 547 VMRGHW, 548 VMRGLB, 549 VMRGLH, 550 VMRGLW, 551 VPERM, 552 VPERMR, 553 VPERMXOR, 554 VPKPX, 555 VPKSDSS, 556 VPKSDUS, 557 VPKSHSS, 558 VPKSHUS, 559 VPKSWSS, 560 VPKSWUS, 561 VPKUDUM, 562 VPKUDUS, 563 VPKUHUM, 564 VPKUHUS, 565 VPKUWUM, 566 VPKUWUS, 567 VPRTYBQ, 568 VSL, 569 VSLDOI, 570 VSLO, 571 VSLV, 572 VSR, 573 VSRO, 574 VSRV, 575 VUPKHPX, 576 VUPKHSB, 577 VUPKHSH, 578 VUPKHSW, 579 VUPKLPX, 580 VUPKLSB, 581 VUPKLSH, 582 VUPKLSW, 583 XXBRD, 584 XXBRH, 585 XXBRQ, 586 XXBRW, 587 XXEXTRACTUW, 588 XXINSERTW, 589 XXMRGHW, 590 XXMRGLW, 591 XXPERM, 592 XXPERMR, 593 XXSLDWI, 594 XXSLDWIs, 595 XXSPLTIB, 596 XXSPLTW, 597 XXSPLTWs, 598 XXPERMDI, 599 XXPERMDIs, 600 VADDCUQ, 601 VADDECUQ, 602 VADDEUQM, 603 VADDUQM, 604 VMUL10CUQ, 605 VMUL10ECUQ, 606 VMUL10EUQ, 607 VMUL10UQ, 608 VSUBCUQ, 609 VSUBECUQ, 610 VSUBEUQM, 611 VSUBUQM, 612 XSCMPEXPQP, 613 XSCMPOQP, 614 XSCMPUQP, 615 XSTSTDCQP, 616 XSXSIGQP, 617 BCDCFN_rec, 618 BCDCFZ_rec, 619 BCDCPSGN_rec, 620 BCDCTN_rec, 621 BCDCTZ_rec, 622 BCDSETSGN_rec, 623 BCDS_rec, 624 BCDTRUNC_rec, 625 BCDUS_rec, 626 BCDUTRUNC_rec 627)>; 628 629// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 630// superslice. That includes both exec pipelines (EXECO, EXECE) and one 631// dispatch. 632def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 633 (instrs 634 BCDSR_rec, 635 XSADDQP, 636 XSADDQPO, 637 XSCVDPQP, 638 XSCVQPDP, 639 XSCVQPDPO, 640 XSCVQPSDZ, 641 XSCVQPSWZ, 642 XSCVQPUDZ, 643 XSCVQPUWZ, 644 XSCVSDQP, 645 XSCVUDQP, 646 XSRQPI, 647 XSRQPIX, 648 XSRQPXP, 649 XSSUBQP, 650 XSSUBQPO 651)>; 652 653// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 654// superslice. That includes both exec pipelines (EXECO, EXECE) and one 655// dispatch. 656def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 657 (instrs 658 BCDCTSQ_rec 659)>; 660 661// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 662// superslice. That includes both exec pipelines (EXECO, EXECE) and one 663// dispatch. 664def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 665 (instrs 666 XSMADDQP, 667 XSMADDQPO, 668 XSMSUBQP, 669 XSMSUBQPO, 670 XSMULQP, 671 XSMULQPO, 672 XSNMADDQP, 673 XSNMADDQPO, 674 XSNMSUBQP, 675 XSNMSUBQPO 676)>; 677 678// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 679// superslice. That includes both exec pipelines (EXECO, EXECE) and one 680// dispatch. 681def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 682 (instrs 683 BCDCFSQ_rec 684)>; 685 686// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 687// superslice. That includes both exec pipelines (EXECO, EXECE) and one 688// dispatch. 689def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 690 (instrs 691 XSDIVQP, 692 XSDIVQPO 693)>; 694 695// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 696// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 697// dispatches. 698def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 699 (instrs 700 XSSQRTQP, 701 XSSQRTQPO 702)>; 703 704// 6 Cycle Load uses a single slice. 705def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 706 (instrs 707 (instregex "LXVL(L)?") 708)>; 709 710// 5 Cycle Load uses a single slice. 711def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 712 (instrs 713 (instregex "LVE(B|H|W)X$"), 714 (instregex "LVX(L)?"), 715 (instregex "LXSI(B|H)ZX$"), 716 LXSDX, 717 LXVB16X, 718 LXVD2X, 719 LXVWSX, 720 LXSIWZX, 721 LXV, 722 LXVX, 723 LXSD, 724 DFLOADf64, 725 XFLOADf64, 726 LIWZX 727)>; 728 729// 4 Cycle Load uses a single slice. 730def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 731 (instrs 732 (instregex "DCB(F|T|ST)(EP)?$"), 733 (instregex "DCBZ(L)?(EP)?$"), 734 (instregex "DCBTST(EP)?$"), 735 (instregex "CP_COPY(8)?$"), 736 (instregex "ICBI(EP)?$"), 737 (instregex "ICBT(LS)?$"), 738 (instregex "LBARX(L)?$"), 739 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 740 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 741 (instregex "LH(A|B)RX(L)?(8)?$"), 742 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 743 (instregex "LWARX(L)?$"), 744 (instregex "LWBRX(8)?$"), 745 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 746 CP_ABORT, 747 DARN, 748 EnforceIEIO, 749 ISYNC, 750 MSGSYNC, 751 TLBSYNC, 752 SYNC, 753 LMW, 754 LSWI 755)>; 756 757// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 758// superslice. 759def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 760 (instrs 761 LFIWZX, 762 LFDX, 763 LFD 764)>; 765 766// Cracked Load Instructions. 767// Load instructions that can be done in parallel. 768def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 769 DISP_PAIR_1C], 770 (instrs 771 SLBIA, 772 SLBIE, 773 SLBMFEE, 774 SLBMFEV, 775 SLBMTE, 776 TLBIEL 777)>; 778 779// Cracked Load Instruction. 780// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 781// operations can be run in parallel. 782def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 783 DISP_PAIR_1C, DISP_PAIR_1C], 784 (instrs 785 (instregex "L(W|H)ZU(X)?(8)?$") 786)>; 787 788// Cracked TEND Instruction. 789// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 790// operations can be run in parallel. 791def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 792 DISP_1C, DISP_1C], 793 (instrs 794 TEND 795)>; 796 797 798// Cracked Store Instruction 799// Consecutive Store and ALU instructions. The store is restricted and requires 800// three dispatches. 801def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 802 DISP_3SLOTS_1C, DISP_1C], 803 (instrs 804 (instregex "ST(B|H|W|D)CX$") 805)>; 806 807// Cracked Load Instruction. 808// Two consecutive load operations for a total of 8 cycles. 809def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, 810 DISP_1C, DISP_1C], 811 (instrs 812 LDMX 813)>; 814 815// Cracked Load instruction. 816// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 817// operations cannot be done at the same time and so their latencies are added. 818def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 819 DISP_1C, DISP_1C], 820 (instrs 821 (instregex "LHA(X)?(8)?$"), 822 (instregex "CP_PASTE(8)?_rec$"), 823 (instregex "LWA(X)?(_32)?$"), 824 TCHECK 825)>; 826 827// Cracked Restricted Load instruction. 828// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 829// operations cannot be done at the same time and so their latencies are added. 830// Full 6 dispatches are required as this is both cracked and restricted. 831def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 832 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 833 (instrs 834 LFIWAX 835)>; 836 837// Cracked Load instruction. 838// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 839// operations cannot be done at the same time and so their latencies are added. 840// Full 4 dispatches are required as this is a cracked instruction. 841def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 842 (instrs 843 LXSIWAX, 844 LIWAX 845)>; 846 847// Cracked Load instruction. 848// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 849// cycles. The Load and ALU operations cannot be done at the same time and so 850// their latencies are added. 851// Full 6 dispatches are required as this is a restricted instruction. 852def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 853 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 854 (instrs 855 LFSX, 856 LFS 857)>; 858 859// Cracked Load instruction. 860// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 861// operations cannot be done at the same time and so their latencies are added. 862// Full 4 dispatches are required as this is a cracked instruction. 863def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 864 (instrs 865 LXSSP, 866 LXSSPX, 867 XFLOADf32, 868 DFLOADf32 869)>; 870 871// Cracked 3-Way Load Instruction 872// Load with two ALU operations that depend on each other 873def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 874 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 875 (instrs 876 (instregex "LHAU(X)?(8)?$"), 877 LWAUX 878)>; 879 880// Cracked Load that requires the PM resource. 881// Since the Load and the PM cannot be done at the same time the latencies are 882// added. Requires 8 cycles. Since the PM requires the full superslice we need 883// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 884// requires the remaining 1 dispatch. 885def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 886 DISP_1C, DISP_1C], 887 (instrs 888 LXVH8X, 889 LXVDSX, 890 LXVW4X 891)>; 892 893// Single slice Restricted store operation. The restricted operation requires 894// all three dispatches for the superslice. 895def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 896 (instrs 897 (instregex "STF(S|D|IWX|SX|DX)$"), 898 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 899 (instregex "STW(8)?$"), 900 (instregex "(D|X)FSTORE(f32|f64)$"), 901 (instregex "ST(W|H|D)BRX$"), 902 (instregex "ST(B|H|D)(8)?$"), 903 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 904 STIWX, 905 SLBIEG, 906 STMW, 907 STSWI, 908 TLBIE 909)>; 910 911// Vector Store Instruction 912// Requires the whole superslice and therefore requires one dispatch 913// as well as both the Even and Odd exec pipelines. 914def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 915 (instrs 916 (instregex "STVE(B|H|W)X$"), 917 (instregex "STVX(L)?$"), 918 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 919)>; 920 921// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 922// superslice. That includes both exec pipelines (EXECO, EXECE) and two 923// dispatches. 924def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 925 (instrs 926 (instregex "MTCTR(8)?(loop)?$"), 927 (instregex "MTLR(8)?$") 928)>; 929 930// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 931// superslice. That includes both exec pipelines (EXECO, EXECE) and two 932// dispatches. 933def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 934 (instrs 935 (instregex "M(T|F)VRSAVE(v)?$"), 936 (instregex "M(T|F)PMR$"), 937 (instregex "M(T|F)TB(8)?$"), 938 (instregex "MF(SPR|CTR|LR)(8)?$"), 939 (instregex "M(T|F)MSR(D)?$"), 940 (instregex "MTSPR(8)?$") 941)>; 942 943// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 944// superslice. That includes both exec pipelines (EXECO, EXECE) and two 945// dispatches. 946def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 947 (instrs 948 DIVW, 949 DIVWO, 950 DIVWU, 951 DIVWUO, 952 MODSW 953)>; 954 955// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 956// superslice. That includes both exec pipelines (EXECO, EXECE) and two 957// dispatches. 958def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 959 (instrs 960 DIVWE, 961 DIVWEO, 962 DIVD, 963 DIVDO, 964 DIVWEU, 965 DIVWEUO, 966 DIVDU, 967 DIVDUO, 968 MODSD, 969 MODUD, 970 MODUW 971)>; 972 973// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 974// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 975// dispatches. 976def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 977 (instrs 978 DIVDE, 979 DIVDEO, 980 DIVDEU, 981 DIVDEUO 982)>; 983 984// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 985// and one full superslice for the DIV operation since there is only one DIV per 986// superslice. Latency of DIV plus ALU is 26. 987def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 988 DISP_EVEN_1C, DISP_1C], 989 (instrs 990 (instregex "DIVW(U)?(O)?_rec$") 991)>; 992 993// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 994// and one full superslice for the DIV operation since there is only one DIV per 995// superslice. Latency of DIV plus ALU is 26. 996def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 997 DISP_EVEN_1C, DISP_1C], 998 (instrs 999 DIVD_rec, 1000 DIVDO_rec, 1001 DIVDU_rec, 1002 DIVDUO_rec, 1003 DIVWE_rec, 1004 DIVWEO_rec, 1005 DIVWEU_rec, 1006 DIVWEUO_rec 1007)>; 1008 1009// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1010// and one full superslice for the DIV operation since there is only one DIV per 1011// superslice. Latency of DIV plus ALU is 42. 1012def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1013 DISP_EVEN_1C, DISP_1C], 1014 (instrs 1015 DIVDE_rec, 1016 DIVDEO_rec, 1017 DIVDEU_rec, 1018 DIVDEUO_rec 1019)>; 1020 1021// CR access instructions in _BrMCR, IIC_BrMCRX. 1022 1023// Cracked, restricted, ALU operations. 1024// Here the two ALU ops can actually be done in parallel and therefore the 1025// latencies are not added together. Otherwise this is like having two 1026// instructions running together on two pipelines and 6 dispatches. ALU ops are 1027// 2 cycles each. 1028def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1029 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1030 (instrs 1031 MTCRF, 1032 MTCRF8 1033)>; 1034 1035// Cracked ALU operations. 1036// Here the two ALU ops can actually be done in parallel and therefore the 1037// latencies are not added together. Otherwise this is like having two 1038// instructions running together on two pipelines and 2 dispatches. ALU ops are 1039// 2 cycles each. 1040def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1041 DISP_1C, DISP_1C], 1042 (instrs 1043 (instregex "ADDC(8)?(O)?_rec$"), 1044 (instregex "SUBFC(8)?(O)?_rec$") 1045)>; 1046 1047// Cracked ALU operations. 1048// Two ALU ops can be done in parallel. 1049// One is three cycle ALU the ohter is a two cycle ALU. 1050// One of the ALU ops is restricted the other is not so we have a total of 1051// 5 dispatches. 1052def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1053 DISP_3SLOTS_1C, DISP_1C], 1054 (instrs 1055 (instregex "F(N)?ABS(D|S)_rec$"), 1056 (instregex "FCPSGN(D|S)_rec$"), 1057 (instregex "FNEG(D|S)_rec$"), 1058 FMR_rec 1059)>; 1060 1061// Cracked ALU operations. 1062// Here the two ALU ops can actually be done in parallel and therefore the 1063// latencies are not added together. Otherwise this is like having two 1064// instructions running together on two pipelines and 2 dispatches. 1065// ALU ops are 3 cycles each. 1066def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1067 DISP_1C, DISP_1C], 1068 (instrs 1069 MCRFS 1070)>; 1071 1072// Cracked Restricted ALU operations. 1073// Here the two ALU ops can actually be done in parallel and therefore the 1074// latencies are not added together. Otherwise this is like having two 1075// instructions running together on two pipelines and 6 dispatches. 1076// ALU ops are 3 cycles each. 1077def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1078 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1079 (instrs 1080 (instregex "MTFSF(b|_rec)?$"), 1081 (instregex "MTFSFI(_rec)?$") 1082)>; 1083 1084// Cracked instruction made of two ALU ops. 1085// The two ops cannot be done in parallel. 1086// One of the ALU ops is restricted and takes 3 dispatches. 1087def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1088 DISP_3SLOTS_1C, DISP_1C], 1089 (instrs 1090 (instregex "RLD(I)?C(R|L)_rec$"), 1091 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1092 (instregex "SLW(8)?_rec$"), 1093 (instregex "SRAW(I)?_rec$"), 1094 (instregex "SRW(8)?_rec$"), 1095 RLDICL_32_rec, 1096 RLDIMI_rec 1097)>; 1098 1099// Cracked instruction made of two ALU ops. 1100// The two ops cannot be done in parallel. 1101// Both of the ALU ops are restricted and take 3 dispatches. 1102def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1103 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1104 (instrs 1105 (instregex "MFFS(L|CE|_rec)?$") 1106)>; 1107 1108// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1109// total of 6 cycles. All of the ALU operations are also restricted so each 1110// takes 3 dispatches for a total of 9. 1111def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1112 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1113 (instrs 1114 (instregex "MFCR(8)?$") 1115)>; 1116 1117// Cracked instruction made of two ALU ops. 1118// The two ops cannot be done in parallel. 1119def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1120 (instrs 1121 (instregex "EXTSWSLI_32_64_rec$"), 1122 (instregex "SRAD(I)?_rec$"), 1123 EXTSWSLI_rec, 1124 SLD_rec, 1125 SRD_rec, 1126 RLDIC_rec 1127)>; 1128 1129// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1130def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1131 (instrs 1132 FDIV 1133)>; 1134 1135// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1136def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1137 DISP_3SLOTS_1C, DISP_1C], 1138 (instrs 1139 FDIV_rec 1140)>; 1141 1142// 36 Cycle DP Instruction. 1143// Instruction can be done on a single slice. 1144def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1145 (instrs 1146 XSSQRTDP 1147)>; 1148 1149// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1150def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1151 (instrs 1152 FSQRT 1153)>; 1154 1155// 36 Cycle DP Vector Instruction. 1156def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1157 DISP_1C], 1158 (instrs 1159 XVSQRTDP 1160)>; 1161 1162// 27 Cycle DP Vector Instruction. 1163def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1164 DISP_1C], 1165 (instrs 1166 XVSQRTSP 1167)>; 1168 1169// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1170def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1171 DISP_3SLOTS_1C, DISP_1C], 1172 (instrs 1173 FSQRT_rec 1174)>; 1175 1176// 26 Cycle DP Instruction. 1177def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1178 (instrs 1179 XSSQRTSP 1180)>; 1181 1182// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1183def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1184 (instrs 1185 FSQRTS 1186)>; 1187 1188// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1189def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1190 DISP_3SLOTS_1C, DISP_1C], 1191 (instrs 1192 FSQRTS_rec 1193)>; 1194 1195// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1196def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1197 (instrs 1198 XSDIVDP 1199)>; 1200 1201// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1202def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1203 (instrs 1204 FDIVS 1205)>; 1206 1207// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1208def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1209 DISP_3SLOTS_1C, DISP_1C], 1210 (instrs 1211 FDIVS_rec 1212)>; 1213 1214// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1215def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1216 (instrs 1217 XSDIVSP 1218)>; 1219 1220// 24 Cycle DP Vector Instruction. Takes one full superslice. 1221// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1222// superslice. 1223def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1224 DISP_1C], 1225 (instrs 1226 XVDIVSP 1227)>; 1228 1229// 33 Cycle DP Vector Instruction. Takes one full superslice. 1230// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1231// superslice. 1232def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1233 DISP_1C], 1234 (instrs 1235 XVDIVDP 1236)>; 1237 1238// Instruction cracked into three pieces. One Load and two ALU operations. 1239// The Load and one of the ALU ops cannot be run at the same time and so the 1240// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1241// Both the load and the ALU that depends on it are restricted and so they take 1242// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1243// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1244def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1245 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1246 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1247 (instrs 1248 (instregex "LF(SU|SUX)$") 1249)>; 1250 1251// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1252// the store and so it can be run at the same time as the store. The store is 1253// also restricted. 1254def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1255 DISP_3SLOTS_1C, DISP_1C], 1256 (instrs 1257 (instregex "STF(S|D)U(X)?$"), 1258 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1259)>; 1260 1261// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1262// the load and so it can be run at the same time as the load. 1263def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1264 DISP_PAIR_1C, DISP_PAIR_1C], 1265 (instrs 1266 (instregex "LBZU(X)?(8)?$"), 1267 (instregex "LDU(X)?$") 1268)>; 1269 1270// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1271// the load and so it can be run at the same time as the load. The load is also 1272// restricted. 3 dispatches are from the restricted load while the other two 1273// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1274// is required for the ALU. 1275def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1276 DISP_3SLOTS_1C, DISP_1C], 1277 (instrs 1278 (instregex "LF(DU|DUX)$") 1279)>; 1280 1281// Crypto Instructions 1282 1283// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1284// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1285// dispatch. 1286def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1287 (instrs 1288 (instregex "VPMSUM(B|H|W|D)$"), 1289 (instregex "V(N)?CIPHER(LAST)?$"), 1290 VSBOX 1291)>; 1292 1293// Branch Instructions 1294 1295// Two Cycle Branch 1296def : InstRW<[P9_BR_2C, DISP_BR_1C], 1297 (instrs 1298 (instregex "BCCCTR(L)?(8)?$"), 1299 (instregex "BCCL(A|R|RL)?$"), 1300 (instregex "BCCTR(L)?(8)?(n)?$"), 1301 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1302 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1303 (instregex "BL(_TLS|_NOP)?$"), 1304 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), 1305 (instregex "BLA(8|8_NOP)?$"), 1306 (instregex "BLR(8|L)?$"), 1307 (instregex "TAILB(A)?(8)?$"), 1308 (instregex "TAILBCTR(8)?$"), 1309 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1310 (instregex "BCLR(L)?(n)?$"), 1311 (instregex "BCTR(L)?(8)?$"), 1312 B, 1313 BA, 1314 BC, 1315 BCC, 1316 BCCA, 1317 BCL, 1318 BCLalways, 1319 BCLn, 1320 BCTRL8_LDinto_toc, 1321 BCTRL_LWZinto_toc, 1322 BCn, 1323 CTRL_DEP 1324)>; 1325 1326// Five Cycle Branch with a 2 Cycle ALU Op 1327// Operations must be done consecutively and not in parallel. 1328def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1329 (instrs 1330 ADDPCIS 1331)>; 1332 1333// Special Extracted Instructions For Atomics 1334 1335// Atomic Load 1336def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1337 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1338 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1339 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1340 (instrs 1341 (instregex "L(D|W)AT$") 1342)>; 1343 1344// Atomic Store 1345def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1346 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1347 (instrs 1348 (instregex "ST(D|W)AT$") 1349)>; 1350 1351// Signal Processing Engine (SPE) Instructions 1352// These instructions are not supported on Power 9 1353def : InstRW<[], 1354 (instrs 1355 BRINC, 1356 EVABS, 1357 EVEQV, 1358 EVMRA, 1359 EVNAND, 1360 EVNEG, 1361 (instregex "EVADD(I)?W$"), 1362 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1363 (instregex "EVAND(C)?$"), 1364 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1365 (instregex "EVCNTL(S|Z)W$"), 1366 (instregex "EVDIVW(S|U)$"), 1367 (instregex "EVEXTS(B|H)$"), 1368 (instregex "EVLD(H|W|D)(X)?$"), 1369 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1370 (instregex "EVLWHE(X)?$"), 1371 (instregex "EVLWHO(S|U)(X)?$"), 1372 (instregex "EVLW(H|W)SPLAT(X)?$"), 1373 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1374 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1375 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1376 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1377 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1378 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1379 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1380 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1381 (instregex "EVMWHUMI(A)?$"), 1382 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1383 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1384 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1385 (instregex "EVMWSSF(A|AA|AN)?$"), 1386 (instregex "EVMWUMI(A|AA|AN)?$"), 1387 (instregex "EV(N|X)?OR(C)?$"), 1388 (instregex "EVR(LW|LWI|NDW)$"), 1389 (instregex "EVSLW(I)?$"), 1390 (instregex "EVSPLAT(F)?I$"), 1391 (instregex "EVSRW(I)?(S|U)$"), 1392 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1393 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1394 (instregex "EVSUB(I)?FW$") 1395)> { let Unsupported = 1; } 1396 1397// General Instructions without scheduling support. 1398def : InstRW<[], 1399 (instrs 1400 (instregex "(H)?RFI(D)?$"), 1401 (instregex "DSS(ALL)?$"), 1402 (instregex "DST(ST)?(T)?(64)?$"), 1403 (instregex "ICBL(C|Q)$"), 1404 (instregex "L(W|H|B)EPX$"), 1405 (instregex "ST(W|H|B)EPX$"), 1406 (instregex "(L|ST)FDEPX$"), 1407 (instregex "M(T|F)SR(IN)?$"), 1408 (instregex "M(T|F)DCR$"), 1409 (instregex "NOP_GT_PWR(6|7)$"), 1410 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1411 (instregex "WRTEE(I)?$"), 1412 (instregex "HASH(ST|STP|CHK|CHKP)$"), 1413 ATTN, 1414 CLRBHRB, 1415 MFBHRBE, 1416 MBAR, 1417 MSYNC, 1418 SLBSYNC, 1419 SLBFEE_rec, 1420 NAP, 1421 STOP, 1422 TRAP, 1423 RFCI, 1424 RFDI, 1425 RFMCI, 1426 SC, 1427 DCBA, 1428 DCBI, 1429 DCCCI, 1430 ICCCI 1431)> { let Unsupported = 1; } 1432