xref: /llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td (revision 674574d25cc35010dbb0b12b01e8beeaddf20a3f)
1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMP(O|U)(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "POPCNTB8$"),
142    (instregex "ADD(I|IS)?(8)?$"),
143    (instregex "LI(S)?(8)?$"),
144    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
145    (instregex "NAND(8)?(_rec)?$"),
146    (instregex "AND(C)?(8)?(_rec)?$"),
147    (instregex "NOR(8)?(_rec)?$"),
148    (instregex "OR(C)?(8)?(_rec)?$"),
149    (instregex "EQV(8)?(_rec)?$"),
150    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
151    (instregex "ADD(4|8)(TLS)?(_)?$"),
152    (instregex "NEG(8)?(O)?$"),
153    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
154    (instregex "LA(8)?$"),
155    COPY,
156    MCRF,
157    MCRXRX,
158    XSNABSDP,
159    XSNABSDPs,
160    XSXEXPDP,
161    XSABSDP,
162    XSNEGDP,
163    XSCPSGNDP,
164    MFVSRWZ,
165    MFVRWZ,
166    EXTSWSLI,
167    SRADI_32,
168    RLDIC,
169    RFEBB,
170    TBEGIN,
171    TRECHKPT,
172    NOP,
173    WAIT
174)>;
175
176// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
177// single slice. However, since it is Restricted, it requires all 3 dispatches
178// (DISP) for that superslice.
179def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
180      (instrs
181    (instregex "RLDC(L|R)$"),
182    (instregex "RLWIMI(8)?$"),
183    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
184    (instregex "M(F|T)OCRF(8)?$"),
185    (instregex "CR(6)?(UN)?SET$"),
186    (instregex "CR(N)?(OR|AND)(C)?$"),
187    (instregex "S(L|R)W(8)?$"),
188    (instregex "RLW(INM|NM)(8)?$"),
189    (instregex "F(N)?ABS(D|S)$"),
190    (instregex "FNEG(D|S)$"),
191    (instregex "FCPSGN(D|S)$"),
192    (instregex "SRAW(8)?$"),
193    (instregex "SRAWI(8)?$"),
194    (instregex "ISEL(8)?$"),
195    RLDIMI,
196    XSIEXPDP,
197    FMR,
198    CREQV,
199    CRNOT,
200    CRXOR,
201    TRECLAIM,
202    TSR,
203    TABORT
204)>;
205
206// Three cycle ALU vector operation that uses an entire superslice.
207// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
208// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
209def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
210      (instrs
211    (instregex "M(T|F)VSCR$"),
212    (instregex "VCMPNEZ(B|H|W)$"),
213    (instregex "VCMPEQU(B|H|W|D)$"),
214    (instregex "VCMPNE(B|H|W)$"),
215    (instregex "VABSDU(B|H|W)$"),
216    (instregex "VADDU(B|H|W)S$"),
217    (instregex "VAVG(S|U)(B|H|W)$"),
218    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
219    (instregex "VCMPBFP(_rec)?$"),
220    (instregex "VC(L|T)Z(B|H|W|D)$"),
221    (instregex "VADDS(B|H|W)S$"),
222    (instregex "V(MIN|MAX)FP$"),
223    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
224    VBPERMD,
225    VADDCUW,
226    VPOPCNTW,
227    VPOPCNTD,
228    VPRTYBD,
229    VPRTYBW,
230    VSHASIGMAD,
231    VSHASIGMAW,
232    VSUBSBS,
233    VSUBSHS,
234    VSUBSWS,
235    VSUBUBS,
236    VSUBUHS,
237    VSUBUWS,
238    VSUBCUW,
239    VCMPGTSB,
240    VCMPGTSB_rec,
241    VCMPGTSD,
242    VCMPGTSD_rec,
243    VCMPGTSH,
244    VCMPGTSH_rec,
245    VCMPGTSW,
246    VCMPGTSW_rec,
247    VCMPGTUB,
248    VCMPGTUB_rec,
249    VCMPGTUD,
250    VCMPGTUD_rec,
251    VCMPGTUH,
252    VCMPGTUH_rec,
253    VCMPGTUW,
254    VCMPGTUW_rec,
255    VCMPNEB_rec,
256    VCMPNEH_rec,
257    VCMPNEW_rec,
258    VCMPNEZB_rec,
259    VCMPNEZH_rec,
260    VCMPNEZW_rec,
261    VCMPEQUB_rec,
262    VCMPEQUD_rec,
263    VCMPEQUH_rec,
264    VCMPEQUW_rec,
265    XVCMPEQDP,
266    XVCMPEQDP_rec,
267    XVCMPEQSP,
268    XVCMPEQSP_rec,
269    XVCMPGEDP,
270    XVCMPGEDP_rec,
271    XVCMPGESP,
272    XVCMPGESP_rec,
273    XVCMPGTDP,
274    XVCMPGTDP_rec,
275    XVCMPGTSP,
276    XVCMPGTSP_rec,
277    XVMAXDP,
278    XVMAXSP,
279    XVMINDP,
280    XVMINSP,
281    XVTDIVDP,
282    XVTDIVSP,
283    XVTSQRTDP,
284    XVTSQRTSP,
285    XVTSTDCDP,
286    XVTSTDCSP,
287    XVXSIGDP,
288    XVXSIGSP
289)>;
290
291// 7 cycle DP vector operation that uses an entire superslice.
292// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
293// EXECO) and all three dispatches (DISP) to the given superslice.
294def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
295      (instrs
296    VADDFP,
297    VCTSXS,
298    VCTSXS_0,
299    VCTUXS,
300    VCTUXS_0,
301    VEXPTEFP,
302    VLOGEFP,
303    VMADDFP,
304    VMHADDSHS,
305    VNMSUBFP,
306    VREFP,
307    VRFIM,
308    VRFIN,
309    VRFIP,
310    VRFIZ,
311    VRSQRTEFP,
312    VSUBFP,
313    XVADDDP,
314    XVADDSP,
315    XVCVDPSP,
316    XVCVDPSXDS,
317    XVCVDPSXWS,
318    XVCVDPUXDS,
319    XVCVDPUXWS,
320    XVCVHPSP,
321    XVCVSPDP,
322    XVCVSPHP,
323    XVCVSPSXDS,
324    XVCVSPSXWS,
325    XVCVSPUXDS,
326    XVCVSPUXWS,
327    XVCVSXDDP,
328    XVCVSXDSP,
329    XVCVSXWDP,
330    XVCVSXWSP,
331    XVCVUXDDP,
332    XVCVUXDSP,
333    XVCVUXWDP,
334    XVCVUXWSP,
335    XVMADDADP,
336    XVMADDASP,
337    XVMADDMDP,
338    XVMADDMSP,
339    XVMSUBADP,
340    XVMSUBASP,
341    XVMSUBMDP,
342    XVMSUBMSP,
343    XVMULDP,
344    XVMULSP,
345    XVNMADDADP,
346    XVNMADDASP,
347    XVNMADDMDP,
348    XVNMADDMSP,
349    XVNMSUBADP,
350    XVNMSUBASP,
351    XVNMSUBMDP,
352    XVNMSUBMSP,
353    XVRDPI,
354    XVRDPIC,
355    XVRDPIM,
356    XVRDPIP,
357    XVRDPIZ,
358    XVREDP,
359    XVRESP,
360    XVRSPI,
361    XVRSPIC,
362    XVRSPIM,
363    XVRSPIP,
364    XVRSPIZ,
365    XVRSQRTEDP,
366    XVRSQRTESP,
367    XVSUBDP,
368    XVSUBSP,
369    VCFSX,
370    VCFSX_0,
371    VCFUX,
372    VCFUX_0,
373    VMHRADDSHS,
374    VMLADDUHM,
375    VMSUMMBM,
376    VMSUMSHM,
377    VMSUMSHS,
378    VMSUMUBM,
379    VMSUMUHM,
380    VMSUMUDM,
381    VMSUMUHS,
382    VMULESB,
383    VMULESH,
384    VMULESW,
385    VMULEUB,
386    VMULEUH,
387    VMULEUW,
388    VMULOSB,
389    VMULOSH,
390    VMULOSW,
391    VMULOUB,
392    VMULOUH,
393    VMULOUW,
394    VMULUWM,
395    VSUM2SWS,
396    VSUM4SBS,
397    VSUM4SHS,
398    VSUM4UBS,
399    VSUMSWS
400)>;
401
402// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
403// dispatch units for the superslice.
404def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
405      (instrs
406    (instregex "MADD(HD|HDU|LD|LD8)$"),
407    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
408)>;
409
410// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
411// dispatch units for the superslice.
412def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
413      (instrs
414    FRSP,
415    (instregex "FRI(N|P|Z|M)(D|S)$"),
416    (instregex "FRE(S)?$"),
417    (instregex "FADD(S)?$"),
418    (instregex "FMSUB(S)?$"),
419    (instregex "FMADD(S)?$"),
420    (instregex "FSUB(S)?$"),
421    (instregex "FCFID(U)?(S)?$"),
422    (instregex "FCTID(U)?(Z)?$"),
423    (instregex "FCTIW(U)?(Z)?$"),
424    (instregex "FRSQRTE(S)?$"),
425    FNMADDS,
426    FNMADD,
427    FNMSUBS,
428    FNMSUB,
429    FSELD,
430    FSELS,
431    FMULS,
432    FMUL,
433    XSMADDADP,
434    XSMADDASP,
435    XSMADDMDP,
436    XSMADDMSP,
437    XSMSUBADP,
438    XSMSUBASP,
439    XSMSUBMDP,
440    XSMSUBMSP,
441    XSMULDP,
442    XSMULSP,
443    XSNMADDADP,
444    XSNMADDASP,
445    XSNMADDMDP,
446    XSNMADDMSP,
447    XSNMSUBADP,
448    XSNMSUBASP,
449    XSNMSUBMDP,
450    XSNMSUBMSP
451)>;
452
453// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
454// These operations can be done in parallel. The DP is restricted so we need a
455// full 4 dispatches.
456def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
457              DISP_3SLOTS_1C, DISP_1C],
458      (instrs
459    (instregex "FSEL(D|S)_rec$")
460)>;
461
462// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
463def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
464              DISP_3SLOTS_1C, DISP_1C],
465      (instrs
466    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
467)>;
468
469// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
470// These operations must be done sequentially.The DP is restricted so we need a
471// full 4 dispatches.
472def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
473              DISP_3SLOTS_1C, DISP_1C],
474      (instrs
475    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
476    (instregex "FRE(S)?_rec$"),
477    (instregex "FADD(S)?_rec$"),
478    (instregex "FSUB(S)?_rec$"),
479    (instregex "F(N)?MSUB(S)?_rec$"),
480    (instregex "F(N)?MADD(S)?_rec$"),
481    (instregex "FCFID(U)?(S)?_rec$"),
482    (instregex "FCTID(U)?(Z)?_rec$"),
483    (instregex "FCTIW(U)?(Z)?_rec$"),
484    (instregex "FMUL(S)?_rec$"),
485    (instregex "FRSQRTE(S)?_rec$"),
486    FRSP_rec
487)>;
488
489// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
490def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
491      (instrs
492    XSADDDP,
493    XSADDSP,
494    XSCVDPHP,
495    XSCVDPSP,
496    XSCVDPSXDS,
497    XSCVDPSXDSs,
498    XSCVDPSXWS,
499    XSCVDPUXDS,
500    XSCVDPUXDSs,
501    XSCVDPUXWS,
502    XSCVDPSXWSs,
503    XSCVDPUXWSs,
504    XSCVHPDP,
505    XSCVSPDP,
506    XSCVSXDDP,
507    XSCVSXDSP,
508    XSCVUXDDP,
509    XSCVUXDSP,
510    XSRDPI,
511    XSRDPIC,
512    XSRDPIM,
513    XSRDPIP,
514    XSRDPIZ,
515    XSREDP,
516    XSRESP,
517    XSRSQRTEDP,
518    XSRSQRTESP,
519    XSSUBDP,
520    XSSUBSP,
521    XSCVDPSPN,
522    XSRSP
523)>;
524
525// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
526// superslice. That includes both exec pipelines (EXECO, EXECE) and one
527// dispatch.
528def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
529      (instrs
530    (instregex "LVS(L|R)$"),
531    (instregex "VSPLTIS(W|H|B)$"),
532    (instregex "VSPLT(W|H|B)(s)?$"),
533    (instregex "V_SETALLONES(B|H)?$"),
534    (instregex "VEXTRACTU(B|H|W)$"),
535    (instregex "VINSERT(B|H|W|D)$"),
536    MFVSRLD,
537    MTVSRWS,
538    VBPERMQ,
539    VCLZLSBB,
540    VCTZLSBB,
541    VEXTRACTD,
542    VEXTUBLX,
543    VEXTUBRX,
544    VEXTUHLX,
545    VEXTUHRX,
546    VEXTUWLX,
547    VEXTUWRX,
548    VGBBD,
549    VMRGHB,
550    VMRGHH,
551    VMRGHW,
552    VMRGLB,
553    VMRGLH,
554    VMRGLW,
555    VPERM,
556    VPERMR,
557    VPERMXOR,
558    VPKPX,
559    VPKSDSS,
560    VPKSDUS,
561    VPKSHSS,
562    VPKSHUS,
563    VPKSWSS,
564    VPKSWUS,
565    VPKUDUM,
566    VPKUDUS,
567    VPKUHUM,
568    VPKUHUS,
569    VPKUWUM,
570    VPKUWUS,
571    VPRTYBQ,
572    VSL,
573    VSLDOI,
574    VSLO,
575    VSLV,
576    VSR,
577    VSRO,
578    VSRV,
579    VUPKHPX,
580    VUPKHSB,
581    VUPKHSH,
582    VUPKHSW,
583    VUPKLPX,
584    VUPKLSB,
585    VUPKLSH,
586    VUPKLSW,
587    XXBRD,
588    XXBRH,
589    XXBRQ,
590    XXBRW,
591    XXEXTRACTUW,
592    XXINSERTW,
593    XXMRGHW,
594    XXMRGLW,
595    XXPERM,
596    XXPERMR,
597    XXSLDWI,
598    XXSLDWIs,
599    XXSPLTIB,
600    XXSPLTW,
601    XXSPLTWs,
602    XXPERMDI,
603    XXPERMDIs,
604    VADDCUQ,
605    VADDECUQ,
606    VADDEUQM,
607    VADDUQM,
608    VMUL10CUQ,
609    VMUL10ECUQ,
610    VMUL10EUQ,
611    VMUL10UQ,
612    VSUBCUQ,
613    VSUBECUQ,
614    VSUBEUQM,
615    VSUBUQM,
616    XSCMPEXPQP,
617    XSCMPOQP,
618    XSCMPUQP,
619    XSTSTDCQP,
620    XSXSIGQP,
621    BCDCFN_rec,
622    BCDCFZ_rec,
623    BCDCPSGN_rec,
624    BCDCTN_rec,
625    BCDCTZ_rec,
626    BCDSETSGN_rec,
627    BCDS_rec,
628    BCDTRUNC_rec,
629    BCDUS_rec,
630    BCDUTRUNC_rec,
631    BCDADD_rec,
632    BCDSUB_rec
633)>;
634
635// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
636// superslice. That includes both exec pipelines (EXECO, EXECE) and one
637// dispatch.
638def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
639      (instrs
640    BCDSR_rec,
641    XSADDQP,
642    XSADDQPO,
643    XSCVDPQP,
644    XSCVQPDP,
645    XSCVQPDPO,
646    XSCVQPSDZ,
647    XSCVQPSWZ,
648    XSCVQPUDZ,
649    XSCVQPUWZ,
650    XSCVSDQP,
651    XSCVUDQP,
652    XSRQPI,
653    XSRQPIX,
654    XSRQPXP,
655    XSSUBQP,
656    XSSUBQPO
657)>;
658
659// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
660// superslice. That includes both exec pipelines (EXECO, EXECE) and one
661// dispatch.
662def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
663      (instrs
664    BCDCTSQ_rec
665)>;
666
667// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
668// superslice. That includes both exec pipelines (EXECO, EXECE) and one
669// dispatch.
670def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
671      (instrs
672    XSMADDQP,
673    XSMADDQPO,
674    XSMSUBQP,
675    XSMSUBQPO,
676    XSMULQP,
677    XSMULQPO,
678    XSNMADDQP,
679    XSNMADDQPO,
680    XSNMSUBQP,
681    XSNMSUBQPO
682)>;
683
684// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
685// superslice. That includes both exec pipelines (EXECO, EXECE) and one
686// dispatch.
687def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
688      (instrs
689    BCDCFSQ_rec
690)>;
691
692// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
693// superslice. That includes both exec pipelines (EXECO, EXECE) and one
694// dispatch.
695def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
696      (instrs
697    XSDIVQP,
698    XSDIVQPO
699)>;
700
701// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
702// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
703// dispatches.
704def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
705      (instrs
706    XSSQRTQP,
707    XSSQRTQPO
708)>;
709
710// 6 Cycle Load uses a single slice.
711def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
712      (instrs
713    (instregex "LXVL(L)?")
714)>;
715
716// 5 Cycle Load uses a single slice.
717def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
718      (instrs
719    (instregex "LVE(B|H|W)X$"),
720    (instregex "LVX(L)?"),
721    (instregex "LXSI(B|H)ZX$"),
722    LXSDX,
723    LXVB16X,
724    LXVD2X,
725    LXVWSX,
726    LXSIWZX,
727    LXV,
728    LXVX,
729    LXSD,
730    DFLOADf64,
731    XFLOADf64,
732    LIWZX
733)>;
734
735// 4 Cycle Load uses a single slice.
736def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
737      (instrs
738    (instregex "DCB(F|T|ST)(EP)?$"),
739    (instregex "DCBZ(L)?(EP)?$"),
740    (instregex "DCBTST(EP)?$"),
741    (instregex "CP_COPY(8)?$"),
742    (instregex "ICBI(EP)?$"),
743    (instregex "ICBT(LS)?$"),
744    (instregex "LBARX(L)?$"),
745    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
746    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
747    (instregex "LH(A|B)RX(L)?(8)?$"),
748    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
749    (instregex "LWARX(L)?$"),
750    (instregex "LWBRX(8)?$"),
751    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
752    CP_ABORT,
753    DARN,
754    EnforceIEIO,
755    ISYNC,
756    MSGSYNC,
757    TLBSYNC,
758    SYNC,
759    LMW,
760    LSWI
761)>;
762
763// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
764// superslice.
765def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
766      (instrs
767    LFIWZX,
768    LFDX,
769    (instregex "LFDXTLS?(_)?$"),
770    LFD
771)>;
772
773// Cracked Load Instructions.
774// Load instructions that can be done in parallel.
775def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
776              DISP_PAIR_1C],
777      (instrs
778    SLBIA,
779    SLBIE,
780    SLBMFEE,
781    SLBMFEV,
782    SLBMTE,
783    TLBIEL
784)>;
785
786// Cracked Load Instruction.
787// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
788// operations can be run in parallel.
789def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
790              DISP_PAIR_1C, DISP_PAIR_1C],
791      (instrs
792    (instregex "L(W|H)ZU(X)?(8)?$")
793)>;
794
795// Cracked TEND Instruction.
796// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
797// operations can be run in parallel.
798def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
799              DISP_1C, DISP_1C],
800      (instrs
801    TEND
802)>;
803
804
805// Cracked Store Instruction
806// Consecutive Store and ALU instructions. The store is restricted and requires
807// three dispatches.
808def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
809              DISP_3SLOTS_1C, DISP_1C],
810      (instrs
811    (instregex "ST(B|H|W|D)CX$")
812)>;
813
814// Cracked Load instruction.
815// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
816// operations cannot be done at the same time and so their latencies are added.
817def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
818              DISP_1C, DISP_1C],
819      (instrs
820    (instregex "LHA(X)?(TLS)?(8)?(_32)?(_)?$"),
821    (instregex "CP_PASTE(8)?_rec$"),
822    (instregex "LWA(X)?(TLS)?(_32)?(_)?$"),
823    TCHECK
824)>;
825
826// Cracked Restricted Load instruction.
827// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
828// operations cannot be done at the same time and so their latencies are added.
829// Full 6 dispatches are required as this is both cracked and restricted.
830def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
831              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
832      (instrs
833    LFIWAX
834)>;
835
836// Cracked Load instruction.
837// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
838// operations cannot be done at the same time and so their latencies are added.
839// Full 4 dispatches are required as this is a cracked instruction.
840def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
841      (instrs
842    LXSIWAX,
843    LIWAX
844)>;
845
846// Cracked Load instruction.
847// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
848// cycles. The Load and ALU operations cannot be done at the same time and so
849// their latencies are added.
850// Full 6 dispatches are required as this is a restricted instruction.
851def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
852              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
853      (instrs
854    LFSX,
855    (instregex "LFSXTLS?(_)?$"),
856    LFS
857)>;
858
859// Cracked Load instruction.
860// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
861// operations cannot be done at the same time and so their latencies are added.
862// Full 4 dispatches are required as this is a cracked instruction.
863def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
864      (instrs
865    LXSSP,
866    LXSSPX,
867    XFLOADf32,
868    DFLOADf32
869)>;
870
871// Cracked 3-Way Load Instruction
872// Load with two ALU operations that depend on each other
873def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
874              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
875      (instrs
876    (instregex "LHAU(X)?(8)?$"),
877    LWAUX
878)>;
879
880// Cracked Load that requires the PM resource.
881// Since the Load and the PM cannot be done at the same time the latencies are
882// added. Requires 8 cycles. Since the PM requires the full superslice we need
883// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
884// requires the remaining 1 dispatch.
885def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
886              DISP_1C, DISP_1C],
887      (instrs
888    LXVH8X,
889    LXVDSX,
890    LXVW4X
891)>;
892
893// Single slice Restricted store operation. The restricted operation requires
894// all three dispatches for the superslice.
895def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
896      (instrs
897    (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS|SXTLS_|DXTLS_)$"),
898    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
899    (instregex "STW(8)?$"),
900    (instregex "(D|X)FSTORE(f32|f64)$"),
901    (instregex "ST(W|H|D)BRX$"),
902    (instregex "ST(B|H|D)(8)?$"),
903    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
904    STIWX,
905    SLBIEG,
906    STMW,
907    STSWI,
908    TLBIE
909)>;
910
911// Vector Store Instruction
912// Requires the whole superslice and therefore requires one dispatch
913// as well as both the Even and Odd exec pipelines.
914def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
915      (instrs
916    (instregex "STVE(B|H|W)X$"),
917    (instregex "STVX(L)?$"),
918    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
919)>;
920
921// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
922// superslice. That includes both exec pipelines (EXECO, EXECE) and two
923// dispatches.
924def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
925      (instrs
926    (instregex "MTCTR(8)?(loop)?$"),
927    (instregex "MTLR(8)?$")
928)>;
929
930// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
931// superslice. That includes both exec pipelines (EXECO, EXECE) and two
932// dispatches.
933def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
934      (instrs
935    (instregex "M(T|F)VRSAVE(v)?$"),
936    (instregex "M(T|F)PMR$"),
937    (instregex "M(T|F)TB(8)?$"),
938    (instregex "MF(SPR|CTR|LR)(8)?$"),
939    (instregex "M(T|F)MSR(D)?$"),
940    (instregex "M(T|F)(U)?DSCR$"),
941    (instregex "MTSPR(8)?$")
942)>;
943
944// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
945// superslice. That includes both exec pipelines (EXECO, EXECE) and two
946// dispatches.
947def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
948      (instrs
949    DIVW,
950    DIVWO,
951    DIVWU,
952    DIVWUO,
953    MODSW
954)>;
955
956// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
957// superslice. That includes both exec pipelines (EXECO, EXECE) and two
958// dispatches.
959def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
960      (instrs
961    DIVWE,
962    DIVWEO,
963    DIVD,
964    DIVDO,
965    DIVWEU,
966    DIVWEUO,
967    DIVDU,
968    DIVDUO,
969    MODSD,
970    MODUD,
971    MODUW
972)>;
973
974// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
975// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
976// dispatches.
977def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
978      (instrs
979    DIVDE,
980    DIVDEO,
981    DIVDEU,
982    DIVDEUO
983)>;
984
985// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
986// and one full superslice for the DIV operation since there is only one DIV per
987// superslice. Latency of DIV plus ALU is 26.
988def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
989              DISP_EVEN_1C, DISP_1C],
990      (instrs
991    (instregex "DIVW(U)?(O)?_rec$")
992)>;
993
994// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
995// and one full superslice for the DIV operation since there is only one DIV per
996// superslice. Latency of DIV plus ALU is 26.
997def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
998              DISP_EVEN_1C, DISP_1C],
999      (instrs
1000    DIVD_rec,
1001    DIVDO_rec,
1002    DIVDU_rec,
1003    DIVDUO_rec,
1004    DIVWE_rec,
1005    DIVWEO_rec,
1006    DIVWEU_rec,
1007    DIVWEUO_rec
1008)>;
1009
1010// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1011// and one full superslice for the DIV operation since there is only one DIV per
1012// superslice. Latency of DIV plus ALU is 42.
1013def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1014              DISP_EVEN_1C, DISP_1C],
1015      (instrs
1016    DIVDE_rec,
1017    DIVDEO_rec,
1018    DIVDEU_rec,
1019    DIVDEUO_rec
1020)>;
1021
1022// CR access instructions in _BrMCR, IIC_BrMCRX.
1023
1024// Cracked, restricted, ALU operations.
1025// Here the two ALU ops can actually be done in parallel and therefore the
1026// latencies are not added together. Otherwise this is like having two
1027// instructions running together on two pipelines and 6 dispatches. ALU ops are
1028// 2 cycles each.
1029def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1030              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1031      (instrs
1032    MTCRF,
1033    MTCRF8
1034)>;
1035
1036// Cracked ALU operations.
1037// Here the two ALU ops can actually be done in parallel and therefore the
1038// latencies are not added together. Otherwise this is like having two
1039// instructions running together on two pipelines and 2 dispatches. ALU ops are
1040// 2 cycles each.
1041def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1042              DISP_1C, DISP_1C],
1043      (instrs
1044    (instregex "ADDC(8)?(O)?_rec$"),
1045    (instregex "SUBFC(8)?(O)?_rec$")
1046)>;
1047
1048// Cracked ALU operations.
1049// Two ALU ops can be done in parallel.
1050// One is three cycle ALU the ohter is a two cycle ALU.
1051// One of the ALU ops is restricted the other is not so we have a total of
1052// 5 dispatches.
1053def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1054              DISP_3SLOTS_1C, DISP_1C],
1055      (instrs
1056    (instregex "F(N)?ABS(D|S)_rec$"),
1057    (instregex "FCPSGN(D|S)_rec$"),
1058    (instregex "FNEG(D|S)_rec$"),
1059    FMR_rec
1060)>;
1061
1062// Cracked ALU operations.
1063// Here the two ALU ops can actually be done in parallel and therefore the
1064// latencies are not added together. Otherwise this is like having two
1065// instructions running together on two pipelines and 2 dispatches.
1066// ALU ops are 3 cycles each.
1067def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1068              DISP_1C, DISP_1C],
1069      (instrs
1070    MCRFS
1071)>;
1072
1073// Cracked Restricted ALU operations.
1074// Here the two ALU ops can actually be done in parallel and therefore the
1075// latencies are not added together. Otherwise this is like having two
1076// instructions running together on two pipelines and 6 dispatches.
1077// ALU ops are 3 cycles each.
1078def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1079              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1080      (instrs
1081    (instregex "MTFSF(b|_rec)?$"),
1082    (instregex "MTFSFI(_rec)?$"),
1083    MTFSFIb
1084)>;
1085
1086// Cracked instruction made of two ALU ops.
1087// The two ops cannot be done in parallel.
1088// One of the ALU ops is restricted and takes 3 dispatches.
1089def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1090              DISP_3SLOTS_1C, DISP_1C],
1091      (instrs
1092    (instregex "RLD(I)?C(R|L)_rec$"),
1093    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1094    (instregex "SLW(8)?_rec$"),
1095    (instregex "SRAW(8)?_rec$"),
1096    (instregex "SRAWI(8)?_rec$"),
1097    (instregex "SRW(8)?_rec$"),
1098    RLDICL_32_rec,
1099    RLDIMI_rec
1100)>;
1101
1102// Cracked instruction made of two ALU ops.
1103// The two ops cannot be done in parallel.
1104// Both of the ALU ops are restricted and take 3 dispatches.
1105def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1106              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1107      (instrs
1108    (instregex "MFFS(L|CE|_rec)?$")
1109)>;
1110
1111// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1112// total of 6 cycles. All of the ALU operations are also restricted so each
1113// takes 3 dispatches for a total of 9.
1114def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1115              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1116      (instrs
1117    (instregex "MFCR(8)?$")
1118)>;
1119
1120// Cracked instruction made of two ALU ops.
1121// The two ops cannot be done in parallel.
1122def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1123      (instrs
1124    (instregex "EXTSWSLI_32_64_rec$"),
1125    (instregex "SRAD(I)?_rec$"),
1126    EXTSWSLI_rec,
1127    SLD_rec,
1128    SRD_rec,
1129    RLDIC_rec
1130)>;
1131
1132// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1133def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1134      (instrs
1135    FDIV
1136)>;
1137
1138// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1139def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1140              DISP_3SLOTS_1C, DISP_1C],
1141      (instrs
1142    FDIV_rec
1143)>;
1144
1145// 36 Cycle DP Instruction.
1146// Instruction can be done on a single slice.
1147def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1148      (instrs
1149    XSSQRTDP
1150)>;
1151
1152// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1153def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1154      (instrs
1155    FSQRT
1156)>;
1157
1158// 36 Cycle DP Vector Instruction.
1159def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1160              DISP_1C],
1161      (instrs
1162    XVSQRTDP
1163)>;
1164
1165// 27 Cycle DP Vector Instruction.
1166def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1167              DISP_1C],
1168      (instrs
1169    XVSQRTSP
1170)>;
1171
1172// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1173def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1174              DISP_3SLOTS_1C, DISP_1C],
1175      (instrs
1176    FSQRT_rec
1177)>;
1178
1179// 26 Cycle DP Instruction.
1180def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1181      (instrs
1182    XSSQRTSP
1183)>;
1184
1185// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1186def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1187      (instrs
1188    FSQRTS
1189)>;
1190
1191// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1192def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1193              DISP_3SLOTS_1C, DISP_1C],
1194      (instrs
1195    FSQRTS_rec
1196)>;
1197
1198// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1199def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1200      (instrs
1201    XSDIVDP
1202)>;
1203
1204// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1205def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1206      (instrs
1207    FDIVS
1208)>;
1209
1210// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1211def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1212              DISP_3SLOTS_1C, DISP_1C],
1213      (instrs
1214    FDIVS_rec
1215)>;
1216
1217// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1218def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1219      (instrs
1220    XSDIVSP
1221)>;
1222
1223// 24 Cycle DP Vector Instruction. Takes one full superslice.
1224// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1225// superslice.
1226def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1227              DISP_1C],
1228      (instrs
1229    XVDIVSP
1230)>;
1231
1232// 33 Cycle DP Vector Instruction. Takes one full superslice.
1233// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1234// superslice.
1235def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1236              DISP_1C],
1237      (instrs
1238    XVDIVDP
1239)>;
1240
1241// Instruction cracked into three pieces. One Load and two ALU operations.
1242// The Load and one of the ALU ops cannot be run at the same time and so the
1243// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1244// Both the load and the ALU that depends on it are restricted and so they take
1245// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1246// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1247def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1248              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1249              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1250      (instrs
1251    (instregex "LF(SU|SUX)$")
1252)>;
1253
1254// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1255// the store and so it can be run at the same time as the store. The store is
1256// also restricted.
1257def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1258              DISP_3SLOTS_1C, DISP_1C],
1259      (instrs
1260    (instregex "STF(S|D)U(X)?$"),
1261    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1262)>;
1263
1264// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1265// the load and so it can be run at the same time as the load.
1266def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1267              DISP_PAIR_1C, DISP_PAIR_1C],
1268      (instrs
1269    (instregex "LBZU(X)?(8)?$"),
1270    (instregex "LDU(X)?$")
1271)>;
1272
1273// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1274// the load and so it can be run at the same time as the load. The load is also
1275// restricted. 3 dispatches are from the restricted load while the other two
1276// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1277// is required for the ALU.
1278def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1279              DISP_3SLOTS_1C, DISP_1C],
1280      (instrs
1281    (instregex "LF(DU|DUX)$")
1282)>;
1283
1284// Crypto Instructions
1285
1286// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1287// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1288// dispatch.
1289def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1290      (instrs
1291    (instregex "VPMSUM(B|H|W|D)$"),
1292    (instregex "V(N)?CIPHER(LAST)?$"),
1293    VSBOX
1294)>;
1295
1296// Branch Instructions
1297
1298// Two Cycle Branch
1299def : InstRW<[P9_BR_2C, DISP_BR_1C],
1300      (instrs
1301  (instregex "BCCCTR(L)?(8)?$"),
1302  (instregex "BCCL(A|R|RL)?$"),
1303  (instregex "BCCTR(L)?(8)?(n)?$"),
1304  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1305  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1306  (instregex "BL(_TLS|_NOP)?(_RM)?$"),
1307  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
1308  (instregex "BLA(8|8_NOP)?(_RM)?$"),
1309  (instregex "BLR(8|L)?$"),
1310  (instregex "TAILB(A)?(8)?$"),
1311  (instregex "TAILBCTR(8)?$"),
1312  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1313  (instregex "BCLR(L)?(n)?$"),
1314  (instregex "BCTR(L)?(8)?(_RM)?$"),
1315  B,
1316  BA,
1317  BC,
1318  BCC,
1319  BCCA,
1320  BCL,
1321  BCLalways,
1322  BCLn,
1323  BCTRL8_LDinto_toc,
1324  BCTRL_LWZinto_toc,
1325  BCTRL8_LDinto_toc_RM,
1326  BCTRL_LWZinto_toc_RM,
1327  BCn,
1328  CTRL_DEP
1329)>;
1330
1331// Five Cycle Branch with a 2 Cycle ALU Op
1332// Operations must be done consecutively and not in parallel.
1333def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1334      (instrs
1335    ADDPCIS
1336)>;
1337
1338// Special Extracted Instructions For Atomics
1339
1340// Atomic Load
1341def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1342              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1343              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
1344              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1345      (instrs
1346    (instregex "L(D|W)AT$")
1347)>;
1348
1349// Atomic Store
1350def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1351              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1352      (instrs
1353    (instregex "ST(D|W)AT$")
1354)>;
1355
1356// Signal Processing Engine (SPE) Instructions
1357// These instructions are not supported on Power 9
1358def : InstRW<[],
1359    (instrs
1360  BRINC,
1361  EVABS,
1362  EVEQV,
1363  EVMRA,
1364  EVNAND,
1365  EVNEG,
1366  (instregex "EVADD(I)?W$"),
1367  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1368  (instregex "EVAND(C)?$"),
1369  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1370  (instregex "EVCNTL(S|Z)W$"),
1371  (instregex "EVDIVW(S|U)$"),
1372  (instregex "EVEXTS(B|H)$"),
1373  (instregex "EVLD(H|W|D)(X)?$"),
1374  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1375  (instregex "EVLWHE(X)?$"),
1376  (instregex "EVLWHO(S|U)(X)?$"),
1377  (instregex "EVLW(H|W)SPLAT(X)?$"),
1378  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1379  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1380  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1381  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1382  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1383  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1384  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1385  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1386  (instregex "EVMWHUMI(A)?$"),
1387  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1388  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1389  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1390  (instregex "EVMWSSF(A|AA|AN)?$"),
1391  (instregex "EVMWUMI(A|AA|AN)?$"),
1392  (instregex "EV(N|X)?OR(C)?$"),
1393  (instregex "EVR(LW|LWI|NDW)$"),
1394  (instregex "EVSLW(I)?$"),
1395  (instregex "EVSPLAT(F)?I$"),
1396  (instregex "EVSRW(I)?(S|U)$"),
1397  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1398  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1399  (instregex "EVSUB(I)?FW$")
1400)> { let Unsupported = 1; }
1401
1402// General Instructions without scheduling support.
1403def : InstRW<[],
1404    (instrs
1405  (instregex "(H)?RFI(D)?$"),
1406  (instregex "DSS(ALL)?$"),
1407  (instregex "DST(ST)?(T)?(64)?$"),
1408  (instregex "ICBL(C|Q)$"),
1409  (instregex "L(W|H|B)EPX$"),
1410  (instregex "ST(W|H|B)EPX$"),
1411  (instregex "(L|ST)FDEPX$"),
1412  (instregex "M(T|F)SR(IN)?$"),
1413  (instregex "M(T|F)DCR$"),
1414  (instregex "NOP_GT_PWR(6|7)$"),
1415  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1416  (instregex "WRTEE(I)?$"),
1417  (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
1418  ATTN,
1419  CLRBHRB,
1420  MFBHRBE,
1421  MBAR,
1422  MSYNC,
1423  SLBSYNC,
1424  SLBFEE_rec,
1425  NAP,
1426  STOP,
1427  TRAP,
1428  RFCI,
1429  RFDI,
1430  RFMCI,
1431  SC,
1432  DCBA,
1433  DCBI,
1434  DCCCI,
1435  ICCCI,
1436  ADDEX,
1437  ADDEX8,
1438  CDTBCD, CDTBCD8,
1439  CBCDTD, CBCDTD8,
1440  ADDG6S, ADDG6S8
1441)> { let Unsupported = 1; }
1442