1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMP(O|U)(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "POPCNTB8$"), 142 (instregex "ADD(I|IS)?(8)?$"), 143 (instregex "LI(S)?(8)?$"), 144 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 145 (instregex "NAND(8)?(_rec)?$"), 146 (instregex "AND(C)?(8)?(_rec)?$"), 147 (instregex "NOR(8)?(_rec)?$"), 148 (instregex "OR(C)?(8)?(_rec)?$"), 149 (instregex "EQV(8)?(_rec)?$"), 150 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 151 (instregex "ADD(4|8)(TLS)?(_)?$"), 152 (instregex "NEG(8)?(O)?$"), 153 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 154 (instregex "LA(8)?$"), 155 COPY, 156 MCRF, 157 MCRXRX, 158 XSNABSDP, 159 XSNABSDPs, 160 XSXEXPDP, 161 XSABSDP, 162 XSNEGDP, 163 XSCPSGNDP, 164 MFVSRWZ, 165 MFVRWZ, 166 EXTSWSLI, 167 SRADI_32, 168 RLDIC, 169 RFEBB, 170 TBEGIN, 171 TRECHKPT, 172 NOP, 173 WAIT 174)>; 175 176// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 177// single slice. However, since it is Restricted, it requires all 3 dispatches 178// (DISP) for that superslice. 179def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 180 (instrs 181 (instregex "RLDC(L|R)$"), 182 (instregex "RLWIMI(8)?$"), 183 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 184 (instregex "M(F|T)OCRF(8)?$"), 185 (instregex "CR(6)?(UN)?SET$"), 186 (instregex "CR(N)?(OR|AND)(C)?$"), 187 (instregex "S(L|R)W(8)?$"), 188 (instregex "RLW(INM|NM)(8)?$"), 189 (instregex "F(N)?ABS(D|S)$"), 190 (instregex "FNEG(D|S)$"), 191 (instregex "FCPSGN(D|S)$"), 192 (instregex "SRAW(8)?$"), 193 (instregex "SRAWI(8)?$"), 194 (instregex "ISEL(8)?$"), 195 RLDIMI, 196 XSIEXPDP, 197 FMR, 198 CREQV, 199 CRNOT, 200 CRXOR, 201 TRECLAIM, 202 TSR, 203 TABORT 204)>; 205 206// Three cycle ALU vector operation that uses an entire superslice. 207// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 208// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 209def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 210 (instrs 211 (instregex "M(T|F)VSCR$"), 212 (instregex "VCMPNEZ(B|H|W)$"), 213 (instregex "VCMPEQU(B|H|W|D)$"), 214 (instregex "VCMPNE(B|H|W)$"), 215 (instregex "VABSDU(B|H|W)$"), 216 (instregex "VADDU(B|H|W)S$"), 217 (instregex "VAVG(S|U)(B|H|W)$"), 218 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 219 (instregex "VCMPBFP(_rec)?$"), 220 (instregex "VC(L|T)Z(B|H|W|D)$"), 221 (instregex "VADDS(B|H|W)S$"), 222 (instregex "V(MIN|MAX)FP$"), 223 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 224 VBPERMD, 225 VADDCUW, 226 VPOPCNTW, 227 VPOPCNTD, 228 VPRTYBD, 229 VPRTYBW, 230 VSHASIGMAD, 231 VSHASIGMAW, 232 VSUBSBS, 233 VSUBSHS, 234 VSUBSWS, 235 VSUBUBS, 236 VSUBUHS, 237 VSUBUWS, 238 VSUBCUW, 239 VCMPGTSB, 240 VCMPGTSB_rec, 241 VCMPGTSD, 242 VCMPGTSD_rec, 243 VCMPGTSH, 244 VCMPGTSH_rec, 245 VCMPGTSW, 246 VCMPGTSW_rec, 247 VCMPGTUB, 248 VCMPGTUB_rec, 249 VCMPGTUD, 250 VCMPGTUD_rec, 251 VCMPGTUH, 252 VCMPGTUH_rec, 253 VCMPGTUW, 254 VCMPGTUW_rec, 255 VCMPNEB_rec, 256 VCMPNEH_rec, 257 VCMPNEW_rec, 258 VCMPNEZB_rec, 259 VCMPNEZH_rec, 260 VCMPNEZW_rec, 261 VCMPEQUB_rec, 262 VCMPEQUD_rec, 263 VCMPEQUH_rec, 264 VCMPEQUW_rec, 265 XVCMPEQDP, 266 XVCMPEQDP_rec, 267 XVCMPEQSP, 268 XVCMPEQSP_rec, 269 XVCMPGEDP, 270 XVCMPGEDP_rec, 271 XVCMPGESP, 272 XVCMPGESP_rec, 273 XVCMPGTDP, 274 XVCMPGTDP_rec, 275 XVCMPGTSP, 276 XVCMPGTSP_rec, 277 XVMAXDP, 278 XVMAXSP, 279 XVMINDP, 280 XVMINSP, 281 XVTDIVDP, 282 XVTDIVSP, 283 XVTSQRTDP, 284 XVTSQRTSP, 285 XVTSTDCDP, 286 XVTSTDCSP, 287 XVXSIGDP, 288 XVXSIGSP 289)>; 290 291// 7 cycle DP vector operation that uses an entire superslice. 292// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 293// EXECO) and all three dispatches (DISP) to the given superslice. 294def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 295 (instrs 296 VADDFP, 297 VCTSXS, 298 VCTSXS_0, 299 VCTUXS, 300 VCTUXS_0, 301 VEXPTEFP, 302 VLOGEFP, 303 VMADDFP, 304 VMHADDSHS, 305 VNMSUBFP, 306 VREFP, 307 VRFIM, 308 VRFIN, 309 VRFIP, 310 VRFIZ, 311 VRSQRTEFP, 312 VSUBFP, 313 XVADDDP, 314 XVADDSP, 315 XVCVDPSP, 316 XVCVDPSXDS, 317 XVCVDPSXWS, 318 XVCVDPUXDS, 319 XVCVDPUXWS, 320 XVCVHPSP, 321 XVCVSPDP, 322 XVCVSPHP, 323 XVCVSPSXDS, 324 XVCVSPSXWS, 325 XVCVSPUXDS, 326 XVCVSPUXWS, 327 XVCVSXDDP, 328 XVCVSXDSP, 329 XVCVSXWDP, 330 XVCVSXWSP, 331 XVCVUXDDP, 332 XVCVUXDSP, 333 XVCVUXWDP, 334 XVCVUXWSP, 335 XVMADDADP, 336 XVMADDASP, 337 XVMADDMDP, 338 XVMADDMSP, 339 XVMSUBADP, 340 XVMSUBASP, 341 XVMSUBMDP, 342 XVMSUBMSP, 343 XVMULDP, 344 XVMULSP, 345 XVNMADDADP, 346 XVNMADDASP, 347 XVNMADDMDP, 348 XVNMADDMSP, 349 XVNMSUBADP, 350 XVNMSUBASP, 351 XVNMSUBMDP, 352 XVNMSUBMSP, 353 XVRDPI, 354 XVRDPIC, 355 XVRDPIM, 356 XVRDPIP, 357 XVRDPIZ, 358 XVREDP, 359 XVRESP, 360 XVRSPI, 361 XVRSPIC, 362 XVRSPIM, 363 XVRSPIP, 364 XVRSPIZ, 365 XVRSQRTEDP, 366 XVRSQRTESP, 367 XVSUBDP, 368 XVSUBSP, 369 VCFSX, 370 VCFSX_0, 371 VCFUX, 372 VCFUX_0, 373 VMHRADDSHS, 374 VMLADDUHM, 375 VMSUMMBM, 376 VMSUMSHM, 377 VMSUMSHS, 378 VMSUMUBM, 379 VMSUMUHM, 380 VMSUMUDM, 381 VMSUMUHS, 382 VMULESB, 383 VMULESH, 384 VMULESW, 385 VMULEUB, 386 VMULEUH, 387 VMULEUW, 388 VMULOSB, 389 VMULOSH, 390 VMULOSW, 391 VMULOUB, 392 VMULOUH, 393 VMULOUW, 394 VMULUWM, 395 VSUM2SWS, 396 VSUM4SBS, 397 VSUM4SHS, 398 VSUM4UBS, 399 VSUMSWS 400)>; 401 402// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 403// dispatch units for the superslice. 404def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 405 (instrs 406 (instregex "MADD(HD|HDU|LD|LD8)$"), 407 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 408)>; 409 410// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 411// dispatch units for the superslice. 412def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 413 (instrs 414 FRSP, 415 (instregex "FRI(N|P|Z|M)(D|S)$"), 416 (instregex "FRE(S)?$"), 417 (instregex "FADD(S)?$"), 418 (instregex "FMSUB(S)?$"), 419 (instregex "FMADD(S)?$"), 420 (instregex "FSUB(S)?$"), 421 (instregex "FCFID(U)?(S)?$"), 422 (instregex "FCTID(U)?(Z)?$"), 423 (instregex "FCTIW(U)?(Z)?$"), 424 (instregex "FRSQRTE(S)?$"), 425 FNMADDS, 426 FNMADD, 427 FNMSUBS, 428 FNMSUB, 429 FSELD, 430 FSELS, 431 FMULS, 432 FMUL, 433 XSMADDADP, 434 XSMADDASP, 435 XSMADDMDP, 436 XSMADDMSP, 437 XSMSUBADP, 438 XSMSUBASP, 439 XSMSUBMDP, 440 XSMSUBMSP, 441 XSMULDP, 442 XSMULSP, 443 XSNMADDADP, 444 XSNMADDASP, 445 XSNMADDMDP, 446 XSNMADDMSP, 447 XSNMSUBADP, 448 XSNMSUBASP, 449 XSNMSUBMDP, 450 XSNMSUBMSP 451)>; 452 453// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 454// These operations can be done in parallel. The DP is restricted so we need a 455// full 4 dispatches. 456def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 457 DISP_3SLOTS_1C, DISP_1C], 458 (instrs 459 (instregex "FSEL(D|S)_rec$") 460)>; 461 462// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 463def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 464 DISP_3SLOTS_1C, DISP_1C], 465 (instrs 466 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 467)>; 468 469// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 470// These operations must be done sequentially.The DP is restricted so we need a 471// full 4 dispatches. 472def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 473 DISP_3SLOTS_1C, DISP_1C], 474 (instrs 475 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 476 (instregex "FRE(S)?_rec$"), 477 (instregex "FADD(S)?_rec$"), 478 (instregex "FSUB(S)?_rec$"), 479 (instregex "F(N)?MSUB(S)?_rec$"), 480 (instregex "F(N)?MADD(S)?_rec$"), 481 (instregex "FCFID(U)?(S)?_rec$"), 482 (instregex "FCTID(U)?(Z)?_rec$"), 483 (instregex "FCTIW(U)?(Z)?_rec$"), 484 (instregex "FMUL(S)?_rec$"), 485 (instregex "FRSQRTE(S)?_rec$"), 486 FRSP_rec 487)>; 488 489// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 490def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 491 (instrs 492 XSADDDP, 493 XSADDSP, 494 XSCVDPHP, 495 XSCVDPSP, 496 XSCVDPSXDS, 497 XSCVDPSXDSs, 498 XSCVDPSXWS, 499 XSCVDPUXDS, 500 XSCVDPUXDSs, 501 XSCVDPUXWS, 502 XSCVDPSXWSs, 503 XSCVDPUXWSs, 504 XSCVHPDP, 505 XSCVSPDP, 506 XSCVSXDDP, 507 XSCVSXDSP, 508 XSCVUXDDP, 509 XSCVUXDSP, 510 XSRDPI, 511 XSRDPIC, 512 XSRDPIM, 513 XSRDPIP, 514 XSRDPIZ, 515 XSREDP, 516 XSRESP, 517 XSRSQRTEDP, 518 XSRSQRTESP, 519 XSSUBDP, 520 XSSUBSP, 521 XSCVDPSPN, 522 XSRSP 523)>; 524 525// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 526// superslice. That includes both exec pipelines (EXECO, EXECE) and one 527// dispatch. 528def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 529 (instrs 530 (instregex "LVS(L|R)$"), 531 (instregex "VSPLTIS(W|H|B)$"), 532 (instregex "VSPLT(W|H|B)(s)?$"), 533 (instregex "V_SETALLONES(B|H)?$"), 534 (instregex "VEXTRACTU(B|H|W)$"), 535 (instregex "VINSERT(B|H|W|D)$"), 536 MFVSRLD, 537 MTVSRWS, 538 VBPERMQ, 539 VCLZLSBB, 540 VCTZLSBB, 541 VEXTRACTD, 542 VEXTUBLX, 543 VEXTUBRX, 544 VEXTUHLX, 545 VEXTUHRX, 546 VEXTUWLX, 547 VEXTUWRX, 548 VGBBD, 549 VMRGHB, 550 VMRGHH, 551 VMRGHW, 552 VMRGLB, 553 VMRGLH, 554 VMRGLW, 555 VPERM, 556 VPERMR, 557 VPERMXOR, 558 VPKPX, 559 VPKSDSS, 560 VPKSDUS, 561 VPKSHSS, 562 VPKSHUS, 563 VPKSWSS, 564 VPKSWUS, 565 VPKUDUM, 566 VPKUDUS, 567 VPKUHUM, 568 VPKUHUS, 569 VPKUWUM, 570 VPKUWUS, 571 VPRTYBQ, 572 VSL, 573 VSLDOI, 574 VSLO, 575 VSLV, 576 VSR, 577 VSRO, 578 VSRV, 579 VUPKHPX, 580 VUPKHSB, 581 VUPKHSH, 582 VUPKHSW, 583 VUPKLPX, 584 VUPKLSB, 585 VUPKLSH, 586 VUPKLSW, 587 XXBRD, 588 XXBRH, 589 XXBRQ, 590 XXBRW, 591 XXEXTRACTUW, 592 XXINSERTW, 593 XXMRGHW, 594 XXMRGLW, 595 XXPERM, 596 XXPERMR, 597 XXSLDWI, 598 XXSLDWIs, 599 XXSPLTIB, 600 XXSPLTW, 601 XXSPLTWs, 602 XXPERMDI, 603 XXPERMDIs, 604 VADDCUQ, 605 VADDECUQ, 606 VADDEUQM, 607 VADDUQM, 608 VMUL10CUQ, 609 VMUL10ECUQ, 610 VMUL10EUQ, 611 VMUL10UQ, 612 VSUBCUQ, 613 VSUBECUQ, 614 VSUBEUQM, 615 VSUBUQM, 616 XSCMPEXPQP, 617 XSCMPOQP, 618 XSCMPUQP, 619 XSTSTDCQP, 620 XSXSIGQP, 621 BCDCFN_rec, 622 BCDCFZ_rec, 623 BCDCPSGN_rec, 624 BCDCTN_rec, 625 BCDCTZ_rec, 626 BCDSETSGN_rec, 627 BCDS_rec, 628 BCDTRUNC_rec, 629 BCDUS_rec, 630 BCDUTRUNC_rec, 631 BCDADD_rec, 632 BCDSUB_rec 633)>; 634 635// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 636// superslice. That includes both exec pipelines (EXECO, EXECE) and one 637// dispatch. 638def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 639 (instrs 640 BCDSR_rec, 641 XSADDQP, 642 XSADDQPO, 643 XSCVDPQP, 644 XSCVQPDP, 645 XSCVQPDPO, 646 XSCVQPSDZ, 647 XSCVQPSWZ, 648 XSCVQPUDZ, 649 XSCVQPUWZ, 650 XSCVSDQP, 651 XSCVUDQP, 652 XSRQPI, 653 XSRQPIX, 654 XSRQPXP, 655 XSSUBQP, 656 XSSUBQPO 657)>; 658 659// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 660// superslice. That includes both exec pipelines (EXECO, EXECE) and one 661// dispatch. 662def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 663 (instrs 664 BCDCTSQ_rec 665)>; 666 667// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 668// superslice. That includes both exec pipelines (EXECO, EXECE) and one 669// dispatch. 670def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 671 (instrs 672 XSMADDQP, 673 XSMADDQPO, 674 XSMSUBQP, 675 XSMSUBQPO, 676 XSMULQP, 677 XSMULQPO, 678 XSNMADDQP, 679 XSNMADDQPO, 680 XSNMSUBQP, 681 XSNMSUBQPO 682)>; 683 684// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 685// superslice. That includes both exec pipelines (EXECO, EXECE) and one 686// dispatch. 687def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 688 (instrs 689 BCDCFSQ_rec 690)>; 691 692// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 693// superslice. That includes both exec pipelines (EXECO, EXECE) and one 694// dispatch. 695def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 696 (instrs 697 XSDIVQP, 698 XSDIVQPO 699)>; 700 701// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 702// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 703// dispatches. 704def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 705 (instrs 706 XSSQRTQP, 707 XSSQRTQPO 708)>; 709 710// 6 Cycle Load uses a single slice. 711def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 712 (instrs 713 (instregex "LXVL(L)?") 714)>; 715 716// 5 Cycle Load uses a single slice. 717def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 718 (instrs 719 (instregex "LVE(B|H|W)X$"), 720 (instregex "LVX(L)?"), 721 (instregex "LXSI(B|H)ZX$"), 722 LXSDX, 723 LXVB16X, 724 LXVD2X, 725 LXVWSX, 726 LXSIWZX, 727 LXV, 728 LXVX, 729 LXSD, 730 DFLOADf64, 731 XFLOADf64, 732 LIWZX 733)>; 734 735// 4 Cycle Load uses a single slice. 736def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 737 (instrs 738 (instregex "DCB(F|T|ST)(EP)?$"), 739 (instregex "DCBZ(L)?(EP)?$"), 740 (instregex "DCBTST(EP)?$"), 741 (instregex "CP_COPY(8)?$"), 742 (instregex "ICBI(EP)?$"), 743 (instregex "ICBT(LS)?$"), 744 (instregex "LBARX(L)?$"), 745 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 746 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 747 (instregex "LH(A|B)RX(L)?(8)?$"), 748 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 749 (instregex "LWARX(L)?$"), 750 (instregex "LWBRX(8)?$"), 751 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 752 CP_ABORT, 753 DARN, 754 EnforceIEIO, 755 ISYNC, 756 MSGSYNC, 757 TLBSYNC, 758 SYNC, 759 LMW, 760 LSWI 761)>; 762 763// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 764// superslice. 765def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 766 (instrs 767 LFIWZX, 768 LFDX, 769 (instregex "LFDXTLS?(_)?$"), 770 LFD 771)>; 772 773// Cracked Load Instructions. 774// Load instructions that can be done in parallel. 775def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 776 DISP_PAIR_1C], 777 (instrs 778 SLBIA, 779 SLBIE, 780 SLBMFEE, 781 SLBMFEV, 782 SLBMTE, 783 TLBIEL 784)>; 785 786// Cracked Load Instruction. 787// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 788// operations can be run in parallel. 789def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 790 DISP_PAIR_1C, DISP_PAIR_1C], 791 (instrs 792 (instregex "L(W|H)ZU(X)?(8)?$") 793)>; 794 795// Cracked TEND Instruction. 796// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 797// operations can be run in parallel. 798def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 799 DISP_1C, DISP_1C], 800 (instrs 801 TEND 802)>; 803 804 805// Cracked Store Instruction 806// Consecutive Store and ALU instructions. The store is restricted and requires 807// three dispatches. 808def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 809 DISP_3SLOTS_1C, DISP_1C], 810 (instrs 811 (instregex "ST(B|H|W|D)CX$") 812)>; 813 814// Cracked Load instruction. 815// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 816// operations cannot be done at the same time and so their latencies are added. 817def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 818 DISP_1C, DISP_1C], 819 (instrs 820 (instregex "LHA(X)?(TLS)?(8)?(_32)?(_)?$"), 821 (instregex "CP_PASTE(8)?_rec$"), 822 (instregex "LWA(X)?(TLS)?(_32)?(_)?$"), 823 TCHECK 824)>; 825 826// Cracked Restricted Load instruction. 827// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 828// operations cannot be done at the same time and so their latencies are added. 829// Full 6 dispatches are required as this is both cracked and restricted. 830def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 831 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 832 (instrs 833 LFIWAX 834)>; 835 836// Cracked Load instruction. 837// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 838// operations cannot be done at the same time and so their latencies are added. 839// Full 4 dispatches are required as this is a cracked instruction. 840def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 841 (instrs 842 LXSIWAX, 843 LIWAX 844)>; 845 846// Cracked Load instruction. 847// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 848// cycles. The Load and ALU operations cannot be done at the same time and so 849// their latencies are added. 850// Full 6 dispatches are required as this is a restricted instruction. 851def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 852 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 853 (instrs 854 LFSX, 855 (instregex "LFSXTLS?(_)?$"), 856 LFS 857)>; 858 859// Cracked Load instruction. 860// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 861// operations cannot be done at the same time and so their latencies are added. 862// Full 4 dispatches are required as this is a cracked instruction. 863def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 864 (instrs 865 LXSSP, 866 LXSSPX, 867 XFLOADf32, 868 DFLOADf32 869)>; 870 871// Cracked 3-Way Load Instruction 872// Load with two ALU operations that depend on each other 873def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 874 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 875 (instrs 876 (instregex "LHAU(X)?(8)?$"), 877 LWAUX 878)>; 879 880// Cracked Load that requires the PM resource. 881// Since the Load and the PM cannot be done at the same time the latencies are 882// added. Requires 8 cycles. Since the PM requires the full superslice we need 883// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 884// requires the remaining 1 dispatch. 885def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 886 DISP_1C, DISP_1C], 887 (instrs 888 LXVH8X, 889 LXVDSX, 890 LXVW4X 891)>; 892 893// Single slice Restricted store operation. The restricted operation requires 894// all three dispatches for the superslice. 895def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 896 (instrs 897 (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS|SXTLS_|DXTLS_)$"), 898 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 899 (instregex "STW(8)?$"), 900 (instregex "(D|X)FSTORE(f32|f64)$"), 901 (instregex "ST(W|H|D)BRX$"), 902 (instregex "ST(B|H|D)(8)?$"), 903 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 904 STIWX, 905 SLBIEG, 906 STMW, 907 STSWI, 908 TLBIE 909)>; 910 911// Vector Store Instruction 912// Requires the whole superslice and therefore requires one dispatch 913// as well as both the Even and Odd exec pipelines. 914def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 915 (instrs 916 (instregex "STVE(B|H|W)X$"), 917 (instregex "STVX(L)?$"), 918 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 919)>; 920 921// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 922// superslice. That includes both exec pipelines (EXECO, EXECE) and two 923// dispatches. 924def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 925 (instrs 926 (instregex "MTCTR(8)?(loop)?$"), 927 (instregex "MTLR(8)?$") 928)>; 929 930// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 931// superslice. That includes both exec pipelines (EXECO, EXECE) and two 932// dispatches. 933def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 934 (instrs 935 (instregex "M(T|F)VRSAVE(v)?$"), 936 (instregex "M(T|F)PMR$"), 937 (instregex "M(T|F)TB(8)?$"), 938 (instregex "MF(SPR|CTR|LR)(8)?$"), 939 (instregex "M(T|F)MSR(D)?$"), 940 (instregex "M(T|F)(U)?DSCR$"), 941 (instregex "MTSPR(8)?$") 942)>; 943 944// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 945// superslice. That includes both exec pipelines (EXECO, EXECE) and two 946// dispatches. 947def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 948 (instrs 949 DIVW, 950 DIVWO, 951 DIVWU, 952 DIVWUO, 953 MODSW 954)>; 955 956// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 957// superslice. That includes both exec pipelines (EXECO, EXECE) and two 958// dispatches. 959def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 960 (instrs 961 DIVWE, 962 DIVWEO, 963 DIVD, 964 DIVDO, 965 DIVWEU, 966 DIVWEUO, 967 DIVDU, 968 DIVDUO, 969 MODSD, 970 MODUD, 971 MODUW 972)>; 973 974// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 975// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 976// dispatches. 977def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 978 (instrs 979 DIVDE, 980 DIVDEO, 981 DIVDEU, 982 DIVDEUO 983)>; 984 985// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 986// and one full superslice for the DIV operation since there is only one DIV per 987// superslice. Latency of DIV plus ALU is 26. 988def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 989 DISP_EVEN_1C, DISP_1C], 990 (instrs 991 (instregex "DIVW(U)?(O)?_rec$") 992)>; 993 994// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 995// and one full superslice for the DIV operation since there is only one DIV per 996// superslice. Latency of DIV plus ALU is 26. 997def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 998 DISP_EVEN_1C, DISP_1C], 999 (instrs 1000 DIVD_rec, 1001 DIVDO_rec, 1002 DIVDU_rec, 1003 DIVDUO_rec, 1004 DIVWE_rec, 1005 DIVWEO_rec, 1006 DIVWEU_rec, 1007 DIVWEUO_rec 1008)>; 1009 1010// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1011// and one full superslice for the DIV operation since there is only one DIV per 1012// superslice. Latency of DIV plus ALU is 42. 1013def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1014 DISP_EVEN_1C, DISP_1C], 1015 (instrs 1016 DIVDE_rec, 1017 DIVDEO_rec, 1018 DIVDEU_rec, 1019 DIVDEUO_rec 1020)>; 1021 1022// CR access instructions in _BrMCR, IIC_BrMCRX. 1023 1024// Cracked, restricted, ALU operations. 1025// Here the two ALU ops can actually be done in parallel and therefore the 1026// latencies are not added together. Otherwise this is like having two 1027// instructions running together on two pipelines and 6 dispatches. ALU ops are 1028// 2 cycles each. 1029def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1030 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1031 (instrs 1032 MTCRF, 1033 MTCRF8 1034)>; 1035 1036// Cracked ALU operations. 1037// Here the two ALU ops can actually be done in parallel and therefore the 1038// latencies are not added together. Otherwise this is like having two 1039// instructions running together on two pipelines and 2 dispatches. ALU ops are 1040// 2 cycles each. 1041def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1042 DISP_1C, DISP_1C], 1043 (instrs 1044 (instregex "ADDC(8)?(O)?_rec$"), 1045 (instregex "SUBFC(8)?(O)?_rec$") 1046)>; 1047 1048// Cracked ALU operations. 1049// Two ALU ops can be done in parallel. 1050// One is three cycle ALU the ohter is a two cycle ALU. 1051// One of the ALU ops is restricted the other is not so we have a total of 1052// 5 dispatches. 1053def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1054 DISP_3SLOTS_1C, DISP_1C], 1055 (instrs 1056 (instregex "F(N)?ABS(D|S)_rec$"), 1057 (instregex "FCPSGN(D|S)_rec$"), 1058 (instregex "FNEG(D|S)_rec$"), 1059 FMR_rec 1060)>; 1061 1062// Cracked ALU operations. 1063// Here the two ALU ops can actually be done in parallel and therefore the 1064// latencies are not added together. Otherwise this is like having two 1065// instructions running together on two pipelines and 2 dispatches. 1066// ALU ops are 3 cycles each. 1067def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1068 DISP_1C, DISP_1C], 1069 (instrs 1070 MCRFS 1071)>; 1072 1073// Cracked Restricted ALU operations. 1074// Here the two ALU ops can actually be done in parallel and therefore the 1075// latencies are not added together. Otherwise this is like having two 1076// instructions running together on two pipelines and 6 dispatches. 1077// ALU ops are 3 cycles each. 1078def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1079 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1080 (instrs 1081 (instregex "MTFSF(b|_rec)?$"), 1082 (instregex "MTFSFI(_rec)?$"), 1083 MTFSFIb 1084)>; 1085 1086// Cracked instruction made of two ALU ops. 1087// The two ops cannot be done in parallel. 1088// One of the ALU ops is restricted and takes 3 dispatches. 1089def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1090 DISP_3SLOTS_1C, DISP_1C], 1091 (instrs 1092 (instregex "RLD(I)?C(R|L)_rec$"), 1093 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1094 (instregex "SLW(8)?_rec$"), 1095 (instregex "SRAW(8)?_rec$"), 1096 (instregex "SRAWI(8)?_rec$"), 1097 (instregex "SRW(8)?_rec$"), 1098 RLDICL_32_rec, 1099 RLDIMI_rec 1100)>; 1101 1102// Cracked instruction made of two ALU ops. 1103// The two ops cannot be done in parallel. 1104// Both of the ALU ops are restricted and take 3 dispatches. 1105def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1106 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1107 (instrs 1108 (instregex "MFFS(L|CE|_rec)?$") 1109)>; 1110 1111// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1112// total of 6 cycles. All of the ALU operations are also restricted so each 1113// takes 3 dispatches for a total of 9. 1114def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1115 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1116 (instrs 1117 (instregex "MFCR(8)?$") 1118)>; 1119 1120// Cracked instruction made of two ALU ops. 1121// The two ops cannot be done in parallel. 1122def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1123 (instrs 1124 (instregex "EXTSWSLI_32_64_rec$"), 1125 (instregex "SRAD(I)?_rec$"), 1126 EXTSWSLI_rec, 1127 SLD_rec, 1128 SRD_rec, 1129 RLDIC_rec 1130)>; 1131 1132// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1133def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1134 (instrs 1135 FDIV 1136)>; 1137 1138// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1139def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1140 DISP_3SLOTS_1C, DISP_1C], 1141 (instrs 1142 FDIV_rec 1143)>; 1144 1145// 36 Cycle DP Instruction. 1146// Instruction can be done on a single slice. 1147def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1148 (instrs 1149 XSSQRTDP 1150)>; 1151 1152// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1153def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1154 (instrs 1155 FSQRT 1156)>; 1157 1158// 36 Cycle DP Vector Instruction. 1159def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1160 DISP_1C], 1161 (instrs 1162 XVSQRTDP 1163)>; 1164 1165// 27 Cycle DP Vector Instruction. 1166def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1167 DISP_1C], 1168 (instrs 1169 XVSQRTSP 1170)>; 1171 1172// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1173def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1174 DISP_3SLOTS_1C, DISP_1C], 1175 (instrs 1176 FSQRT_rec 1177)>; 1178 1179// 26 Cycle DP Instruction. 1180def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1181 (instrs 1182 XSSQRTSP 1183)>; 1184 1185// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1186def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1187 (instrs 1188 FSQRTS 1189)>; 1190 1191// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1192def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1193 DISP_3SLOTS_1C, DISP_1C], 1194 (instrs 1195 FSQRTS_rec 1196)>; 1197 1198// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1199def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1200 (instrs 1201 XSDIVDP 1202)>; 1203 1204// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1205def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1206 (instrs 1207 FDIVS 1208)>; 1209 1210// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1211def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1212 DISP_3SLOTS_1C, DISP_1C], 1213 (instrs 1214 FDIVS_rec 1215)>; 1216 1217// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1218def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1219 (instrs 1220 XSDIVSP 1221)>; 1222 1223// 24 Cycle DP Vector Instruction. Takes one full superslice. 1224// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1225// superslice. 1226def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1227 DISP_1C], 1228 (instrs 1229 XVDIVSP 1230)>; 1231 1232// 33 Cycle DP Vector Instruction. Takes one full superslice. 1233// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1234// superslice. 1235def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1236 DISP_1C], 1237 (instrs 1238 XVDIVDP 1239)>; 1240 1241// Instruction cracked into three pieces. One Load and two ALU operations. 1242// The Load and one of the ALU ops cannot be run at the same time and so the 1243// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1244// Both the load and the ALU that depends on it are restricted and so they take 1245// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1246// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1247def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1248 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1249 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1250 (instrs 1251 (instregex "LF(SU|SUX)$") 1252)>; 1253 1254// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1255// the store and so it can be run at the same time as the store. The store is 1256// also restricted. 1257def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1258 DISP_3SLOTS_1C, DISP_1C], 1259 (instrs 1260 (instregex "STF(S|D)U(X)?$"), 1261 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1262)>; 1263 1264// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1265// the load and so it can be run at the same time as the load. 1266def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1267 DISP_PAIR_1C, DISP_PAIR_1C], 1268 (instrs 1269 (instregex "LBZU(X)?(8)?$"), 1270 (instregex "LDU(X)?$") 1271)>; 1272 1273// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1274// the load and so it can be run at the same time as the load. The load is also 1275// restricted. 3 dispatches are from the restricted load while the other two 1276// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1277// is required for the ALU. 1278def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1279 DISP_3SLOTS_1C, DISP_1C], 1280 (instrs 1281 (instregex "LF(DU|DUX)$") 1282)>; 1283 1284// Crypto Instructions 1285 1286// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1287// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1288// dispatch. 1289def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1290 (instrs 1291 (instregex "VPMSUM(B|H|W|D)$"), 1292 (instregex "V(N)?CIPHER(LAST)?$"), 1293 VSBOX 1294)>; 1295 1296// Branch Instructions 1297 1298// Two Cycle Branch 1299def : InstRW<[P9_BR_2C, DISP_BR_1C], 1300 (instrs 1301 (instregex "BCCCTR(L)?(8)?$"), 1302 (instregex "BCCL(A|R|RL)?$"), 1303 (instregex "BCCTR(L)?(8)?(n)?$"), 1304 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1305 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1306 (instregex "BL(_TLS|_NOP)?(_RM)?$"), 1307 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"), 1308 (instregex "BLA(8|8_NOP)?(_RM)?$"), 1309 (instregex "BLR(8|L)?$"), 1310 (instregex "TAILB(A)?(8)?$"), 1311 (instregex "TAILBCTR(8)?$"), 1312 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1313 (instregex "BCLR(L)?(n)?$"), 1314 (instregex "BCTR(L)?(8)?(_RM)?$"), 1315 B, 1316 BA, 1317 BC, 1318 BCC, 1319 BCCA, 1320 BCL, 1321 BCLalways, 1322 BCLn, 1323 BCTRL8_LDinto_toc, 1324 BCTRL_LWZinto_toc, 1325 BCTRL8_LDinto_toc_RM, 1326 BCTRL_LWZinto_toc_RM, 1327 BCn, 1328 CTRL_DEP 1329)>; 1330 1331// Five Cycle Branch with a 2 Cycle ALU Op 1332// Operations must be done consecutively and not in parallel. 1333def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1334 (instrs 1335 ADDPCIS 1336)>; 1337 1338// Special Extracted Instructions For Atomics 1339 1340// Atomic Load 1341def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1342 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1343 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1344 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1345 (instrs 1346 (instregex "L(D|W)AT$") 1347)>; 1348 1349// Atomic Store 1350def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1351 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1352 (instrs 1353 (instregex "ST(D|W)AT$") 1354)>; 1355 1356// Signal Processing Engine (SPE) Instructions 1357// These instructions are not supported on Power 9 1358def : InstRW<[], 1359 (instrs 1360 BRINC, 1361 EVABS, 1362 EVEQV, 1363 EVMRA, 1364 EVNAND, 1365 EVNEG, 1366 (instregex "EVADD(I)?W$"), 1367 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1368 (instregex "EVAND(C)?$"), 1369 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1370 (instregex "EVCNTL(S|Z)W$"), 1371 (instregex "EVDIVW(S|U)$"), 1372 (instregex "EVEXTS(B|H)$"), 1373 (instregex "EVLD(H|W|D)(X)?$"), 1374 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1375 (instregex "EVLWHE(X)?$"), 1376 (instregex "EVLWHO(S|U)(X)?$"), 1377 (instregex "EVLW(H|W)SPLAT(X)?$"), 1378 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1379 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1380 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1381 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1382 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1383 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1384 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1385 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1386 (instregex "EVMWHUMI(A)?$"), 1387 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1388 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1389 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1390 (instregex "EVMWSSF(A|AA|AN)?$"), 1391 (instregex "EVMWUMI(A|AA|AN)?$"), 1392 (instregex "EV(N|X)?OR(C)?$"), 1393 (instregex "EVR(LW|LWI|NDW)$"), 1394 (instregex "EVSLW(I)?$"), 1395 (instregex "EVSPLAT(F)?I$"), 1396 (instregex "EVSRW(I)?(S|U)$"), 1397 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1398 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1399 (instregex "EVSUB(I)?FW$") 1400)> { let Unsupported = 1; } 1401 1402// General Instructions without scheduling support. 1403def : InstRW<[], 1404 (instrs 1405 (instregex "(H)?RFI(D)?$"), 1406 (instregex "DSS(ALL)?$"), 1407 (instregex "DST(ST)?(T)?(64)?$"), 1408 (instregex "ICBL(C|Q)$"), 1409 (instregex "L(W|H|B)EPX$"), 1410 (instregex "ST(W|H|B)EPX$"), 1411 (instregex "(L|ST)FDEPX$"), 1412 (instregex "M(T|F)SR(IN)?$"), 1413 (instregex "M(T|F)DCR$"), 1414 (instregex "NOP_GT_PWR(6|7)$"), 1415 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1416 (instregex "WRTEE(I)?$"), 1417 (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"), 1418 ATTN, 1419 CLRBHRB, 1420 MFBHRBE, 1421 MBAR, 1422 MSYNC, 1423 SLBSYNC, 1424 SLBFEE_rec, 1425 NAP, 1426 STOP, 1427 TRAP, 1428 RFCI, 1429 RFDI, 1430 RFMCI, 1431 SC, 1432 DCBA, 1433 DCBI, 1434 DCCCI, 1435 ICCCI, 1436 ADDEX, 1437 ADDEX8, 1438 CDTBCD, CDTBCD8, 1439 CBCDTD, CBCDTD8, 1440 ADDG6S, ADDG6S8 1441)> { let Unsupported = 1; } 1442