xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/PowerPC/P9InstrResources.td (revision 82d56013d7b633d116a93943de88e08335357a7c)
1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMP(O|U)(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "ADD(I|IS)?(8)?$"),
142    (instregex "LI(S)?(8)?$"),
143    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
144    (instregex "NAND(8)?(_rec)?$"),
145    (instregex "AND(C)?(8)?(_rec)?$"),
146    (instregex "NOR(8)?(_rec)?$"),
147    (instregex "OR(C)?(8)?(_rec)?$"),
148    (instregex "EQV(8)?(_rec)?$"),
149    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
150    (instregex "ADD(4|8)(TLS)?(_)?$"),
151    (instregex "NEG(8)?(O)?$"),
152    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
153    COPY,
154    MCRF,
155    MCRXRX,
156    XSNABSDP,
157    XSXEXPDP,
158    XSABSDP,
159    XSNEGDP,
160    XSCPSGNDP,
161    MFVSRWZ,
162    MFVRWZ,
163    EXTSWSLI,
164    SRADI_32,
165    RLDIC,
166    RFEBB,
167    LA,
168    TBEGIN,
169    TRECHKPT,
170    NOP,
171    WAIT
172)>;
173
174// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
175// single slice. However, since it is Restricted, it requires all 3 dispatches
176// (DISP) for that superslice.
177def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
178      (instrs
179    (instregex "RLDC(L|R)$"),
180    (instregex "RLWIMI(8)?$"),
181    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
182    (instregex "M(F|T)OCRF(8)?$"),
183    (instregex "CR(6)?(UN)?SET$"),
184    (instregex "CR(N)?(OR|AND)(C)?$"),
185    (instregex "S(L|R)W(8)?$"),
186    (instregex "RLW(INM|NM)(8)?$"),
187    (instregex "F(N)?ABS(D|S)$"),
188    (instregex "FNEG(D|S)$"),
189    (instregex "FCPSGN(D|S)$"),
190    (instregex "SRAW(I)?$"),
191    (instregex "ISEL(8)?$"),
192    RLDIMI,
193    XSIEXPDP,
194    FMR,
195    CREQV,
196    CRXOR,
197    TRECLAIM,
198    TSR,
199    TABORT
200)>;
201
202// Three cycle ALU vector operation that uses an entire superslice.
203// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
204// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
205def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
206      (instrs
207    (instregex "M(T|F)VSCR$"),
208    (instregex "VCMPNEZ(B|H|W)$"),
209    (instregex "VCMPEQU(B|H|W|D)$"),
210    (instregex "VCMPNE(B|H|W)$"),
211    (instregex "VABSDU(B|H|W)$"),
212    (instregex "VADDU(B|H|W)S$"),
213    (instregex "VAVG(S|U)(B|H|W)$"),
214    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
215    (instregex "VCMPBFP(_rec)?$"),
216    (instregex "VC(L|T)Z(B|H|W|D)$"),
217    (instregex "VADDS(B|H|W)S$"),
218    (instregex "V(MIN|MAX)FP$"),
219    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
220    VBPERMD,
221    VADDCUW,
222    VPOPCNTW,
223    VPOPCNTD,
224    VPRTYBD,
225    VPRTYBW,
226    VSHASIGMAD,
227    VSHASIGMAW,
228    VSUBSBS,
229    VSUBSHS,
230    VSUBSWS,
231    VSUBUBS,
232    VSUBUHS,
233    VSUBUWS,
234    VSUBCUW,
235    VCMPGTSB,
236    VCMPGTSB_rec,
237    VCMPGTSD,
238    VCMPGTSD_rec,
239    VCMPGTSH,
240    VCMPGTSH_rec,
241    VCMPGTSW,
242    VCMPGTSW_rec,
243    VCMPGTUB,
244    VCMPGTUB_rec,
245    VCMPGTUD,
246    VCMPGTUD_rec,
247    VCMPGTUH,
248    VCMPGTUH_rec,
249    VCMPGTUW,
250    VCMPGTUW_rec,
251    VCMPNEB_rec,
252    VCMPNEH_rec,
253    VCMPNEW_rec,
254    VCMPNEZB_rec,
255    VCMPNEZH_rec,
256    VCMPNEZW_rec,
257    VCMPEQUB_rec,
258    VCMPEQUD_rec,
259    VCMPEQUH_rec,
260    VCMPEQUW_rec,
261    XVCMPEQDP,
262    XVCMPEQDP_rec,
263    XVCMPEQSP,
264    XVCMPEQSP_rec,
265    XVCMPGEDP,
266    XVCMPGEDP_rec,
267    XVCMPGESP,
268    XVCMPGESP_rec,
269    XVCMPGTDP,
270    XVCMPGTDP_rec,
271    XVCMPGTSP,
272    XVCMPGTSP_rec,
273    XVMAXDP,
274    XVMAXSP,
275    XVMINDP,
276    XVMINSP,
277    XVTDIVDP,
278    XVTDIVSP,
279    XVTSQRTDP,
280    XVTSQRTSP,
281    XVTSTDCDP,
282    XVTSTDCSP,
283    XVXSIGDP,
284    XVXSIGSP
285)>;
286
287// 7 cycle DP vector operation that uses an entire superslice.
288// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
289// EXECO) and all three dispatches (DISP) to the given superslice.
290def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
291      (instrs
292    VADDFP,
293    VCTSXS,
294    VCTSXS_0,
295    VCTUXS,
296    VCTUXS_0,
297    VEXPTEFP,
298    VLOGEFP,
299    VMADDFP,
300    VMHADDSHS,
301    VNMSUBFP,
302    VREFP,
303    VRFIM,
304    VRFIN,
305    VRFIP,
306    VRFIZ,
307    VRSQRTEFP,
308    VSUBFP,
309    XVADDDP,
310    XVADDSP,
311    XVCVDPSP,
312    XVCVDPSXDS,
313    XVCVDPSXWS,
314    XVCVDPUXDS,
315    XVCVDPUXWS,
316    XVCVHPSP,
317    XVCVSPDP,
318    XVCVSPHP,
319    XVCVSPSXDS,
320    XVCVSPSXWS,
321    XVCVSPUXDS,
322    XVCVSPUXWS,
323    XVCVSXDDP,
324    XVCVSXDSP,
325    XVCVSXWDP,
326    XVCVSXWSP,
327    XVCVUXDDP,
328    XVCVUXDSP,
329    XVCVUXWDP,
330    XVCVUXWSP,
331    XVMADDADP,
332    XVMADDASP,
333    XVMADDMDP,
334    XVMADDMSP,
335    XVMSUBADP,
336    XVMSUBASP,
337    XVMSUBMDP,
338    XVMSUBMSP,
339    XVMULDP,
340    XVMULSP,
341    XVNMADDADP,
342    XVNMADDASP,
343    XVNMADDMDP,
344    XVNMADDMSP,
345    XVNMSUBADP,
346    XVNMSUBASP,
347    XVNMSUBMDP,
348    XVNMSUBMSP,
349    XVRDPI,
350    XVRDPIC,
351    XVRDPIM,
352    XVRDPIP,
353    XVRDPIZ,
354    XVREDP,
355    XVRESP,
356    XVRSPI,
357    XVRSPIC,
358    XVRSPIM,
359    XVRSPIP,
360    XVRSPIZ,
361    XVRSQRTEDP,
362    XVRSQRTESP,
363    XVSUBDP,
364    XVSUBSP,
365    VCFSX,
366    VCFSX_0,
367    VCFUX,
368    VCFUX_0,
369    VMHRADDSHS,
370    VMLADDUHM,
371    VMSUMMBM,
372    VMSUMSHM,
373    VMSUMSHS,
374    VMSUMUBM,
375    VMSUMUHM,
376    VMSUMUDM,
377    VMSUMUHS,
378    VMULESB,
379    VMULESH,
380    VMULESW,
381    VMULEUB,
382    VMULEUH,
383    VMULEUW,
384    VMULOSB,
385    VMULOSH,
386    VMULOSW,
387    VMULOUB,
388    VMULOUH,
389    VMULOUW,
390    VMULUWM,
391    VSUM2SWS,
392    VSUM4SBS,
393    VSUM4SHS,
394    VSUM4UBS,
395    VSUMSWS
396)>;
397
398// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
399// dispatch units for the superslice.
400def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
401      (instrs
402    (instregex "MADD(HD|HDU|LD|LD8)$"),
403    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
404)>;
405
406// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
407// dispatch units for the superslice.
408def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
409      (instrs
410    FRSP,
411    (instregex "FRI(N|P|Z|M)(D|S)$"),
412    (instregex "FRE(S)?$"),
413    (instregex "FADD(S)?$"),
414    (instregex "FMSUB(S)?$"),
415    (instregex "FMADD(S)?$"),
416    (instregex "FSUB(S)?$"),
417    (instregex "FCFID(U)?(S)?$"),
418    (instregex "FCTID(U)?(Z)?$"),
419    (instregex "FCTIW(U)?(Z)?$"),
420    (instregex "FRSQRTE(S)?$"),
421    FNMADDS,
422    FNMADD,
423    FNMSUBS,
424    FNMSUB,
425    FSELD,
426    FSELS,
427    FMULS,
428    FMUL,
429    XSMADDADP,
430    XSMADDASP,
431    XSMADDMDP,
432    XSMADDMSP,
433    XSMSUBADP,
434    XSMSUBASP,
435    XSMSUBMDP,
436    XSMSUBMSP,
437    XSMULDP,
438    XSMULSP,
439    XSNMADDADP,
440    XSNMADDASP,
441    XSNMADDMDP,
442    XSNMADDMSP,
443    XSNMSUBADP,
444    XSNMSUBASP,
445    XSNMSUBMDP,
446    XSNMSUBMSP
447)>;
448
449// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
450// These operations can be done in parallel. The DP is restricted so we need a
451// full 4 dispatches.
452def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
453              DISP_3SLOTS_1C, DISP_1C],
454      (instrs
455    (instregex "FSEL(D|S)_rec$")
456)>;
457
458// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
459def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
460              DISP_3SLOTS_1C, DISP_1C],
461      (instrs
462    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
463)>;
464
465// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
466// These operations must be done sequentially.The DP is restricted so we need a
467// full 4 dispatches.
468def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
469              DISP_3SLOTS_1C, DISP_1C],
470      (instrs
471    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
472    (instregex "FRE(S)?_rec$"),
473    (instregex "FADD(S)?_rec$"),
474    (instregex "FSUB(S)?_rec$"),
475    (instregex "F(N)?MSUB(S)?_rec$"),
476    (instregex "F(N)?MADD(S)?_rec$"),
477    (instregex "FCFID(U)?(S)?_rec$"),
478    (instregex "FCTID(U)?(Z)?_rec$"),
479    (instregex "FCTIW(U)?(Z)?_rec$"),
480    (instregex "FMUL(S)?_rec$"),
481    (instregex "FRSQRTE(S)?_rec$"),
482    FRSP_rec
483)>;
484
485// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
486def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
487      (instrs
488    XSADDDP,
489    XSADDSP,
490    XSCVDPHP,
491    XSCVDPSP,
492    XSCVDPSXDS,
493    XSCVDPSXDSs,
494    XSCVDPSXWS,
495    XSCVDPUXDS,
496    XSCVDPUXDSs,
497    XSCVDPUXWS,
498    XSCVDPSXWSs,
499    XSCVDPUXWSs,
500    XSCVHPDP,
501    XSCVSPDP,
502    XSCVSXDDP,
503    XSCVSXDSP,
504    XSCVUXDDP,
505    XSCVUXDSP,
506    XSRDPI,
507    XSRDPIC,
508    XSRDPIM,
509    XSRDPIP,
510    XSRDPIZ,
511    XSREDP,
512    XSRESP,
513    XSRSQRTEDP,
514    XSRSQRTESP,
515    XSSUBDP,
516    XSSUBSP,
517    XSCVDPSPN,
518    XSRSP
519)>;
520
521// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
522// superslice. That includes both exec pipelines (EXECO, EXECE) and one
523// dispatch.
524def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
525      (instrs
526    (instregex "LVS(L|R)$"),
527    (instregex "VSPLTIS(W|H|B)$"),
528    (instregex "VSPLT(W|H|B)(s)?$"),
529    (instregex "V_SETALLONES(B|H)?$"),
530    (instregex "VEXTRACTU(B|H|W)$"),
531    (instregex "VINSERT(B|H|W|D)$"),
532    MFVSRLD,
533    MTVSRWS,
534    VBPERMQ,
535    VCLZLSBB,
536    VCTZLSBB,
537    VEXTRACTD,
538    VEXTUBLX,
539    VEXTUBRX,
540    VEXTUHLX,
541    VEXTUHRX,
542    VEXTUWLX,
543    VEXTUWRX,
544    VGBBD,
545    VMRGHB,
546    VMRGHH,
547    VMRGHW,
548    VMRGLB,
549    VMRGLH,
550    VMRGLW,
551    VPERM,
552    VPERMR,
553    VPERMXOR,
554    VPKPX,
555    VPKSDSS,
556    VPKSDUS,
557    VPKSHSS,
558    VPKSHUS,
559    VPKSWSS,
560    VPKSWUS,
561    VPKUDUM,
562    VPKUDUS,
563    VPKUHUM,
564    VPKUHUS,
565    VPKUWUM,
566    VPKUWUS,
567    VPRTYBQ,
568    VSL,
569    VSLDOI,
570    VSLO,
571    VSLV,
572    VSR,
573    VSRO,
574    VSRV,
575    VUPKHPX,
576    VUPKHSB,
577    VUPKHSH,
578    VUPKHSW,
579    VUPKLPX,
580    VUPKLSB,
581    VUPKLSH,
582    VUPKLSW,
583    XXBRD,
584    XXBRH,
585    XXBRQ,
586    XXBRW,
587    XXEXTRACTUW,
588    XXINSERTW,
589    XXMRGHW,
590    XXMRGLW,
591    XXPERM,
592    XXPERMR,
593    XXSLDWI,
594    XXSLDWIs,
595    XXSPLTIB,
596    XXSPLTW,
597    XXSPLTWs,
598    XXPERMDI,
599    XXPERMDIs,
600    VADDCUQ,
601    VADDECUQ,
602    VADDEUQM,
603    VADDUQM,
604    VMUL10CUQ,
605    VMUL10ECUQ,
606    VMUL10EUQ,
607    VMUL10UQ,
608    VSUBCUQ,
609    VSUBECUQ,
610    VSUBEUQM,
611    VSUBUQM,
612    XSCMPEXPQP,
613    XSCMPOQP,
614    XSCMPUQP,
615    XSTSTDCQP,
616    XSXSIGQP,
617    BCDCFN_rec,
618    BCDCFZ_rec,
619    BCDCPSGN_rec,
620    BCDCTN_rec,
621    BCDCTZ_rec,
622    BCDSETSGN_rec,
623    BCDS_rec,
624    BCDTRUNC_rec,
625    BCDUS_rec,
626    BCDUTRUNC_rec
627)>;
628
629// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
630// superslice. That includes both exec pipelines (EXECO, EXECE) and one
631// dispatch.
632def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
633      (instrs
634    BCDSR_rec,
635    XSADDQP,
636    XSADDQPO,
637    XSCVDPQP,
638    XSCVQPDP,
639    XSCVQPDPO,
640    XSCVQPSDZ,
641    XSCVQPSWZ,
642    XSCVQPUDZ,
643    XSCVQPUWZ,
644    XSCVSDQP,
645    XSCVUDQP,
646    XSRQPI,
647    XSRQPIX,
648    XSRQPXP,
649    XSSUBQP,
650    XSSUBQPO
651)>;
652
653// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
654// superslice. That includes both exec pipelines (EXECO, EXECE) and one
655// dispatch.
656def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
657      (instrs
658    BCDCTSQ_rec
659)>;
660
661// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
662// superslice. That includes both exec pipelines (EXECO, EXECE) and one
663// dispatch.
664def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
665      (instrs
666    XSMADDQP,
667    XSMADDQPO,
668    XSMSUBQP,
669    XSMSUBQPO,
670    XSMULQP,
671    XSMULQPO,
672    XSNMADDQP,
673    XSNMADDQPO,
674    XSNMSUBQP,
675    XSNMSUBQPO
676)>;
677
678// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
679// superslice. That includes both exec pipelines (EXECO, EXECE) and one
680// dispatch.
681def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
682      (instrs
683    BCDCFSQ_rec
684)>;
685
686// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
687// superslice. That includes both exec pipelines (EXECO, EXECE) and one
688// dispatch.
689def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
690      (instrs
691    XSDIVQP,
692    XSDIVQPO
693)>;
694
695// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
696// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
697// dispatches.
698def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
699      (instrs
700    XSSQRTQP,
701    XSSQRTQPO
702)>;
703
704// 6 Cycle Load uses a single slice.
705def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
706      (instrs
707    (instregex "LXVL(L)?")
708)>;
709
710// 5 Cycle Load uses a single slice.
711def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
712      (instrs
713    (instregex "LVE(B|H|W)X$"),
714    (instregex "LVX(L)?"),
715    (instregex "LXSI(B|H)ZX$"),
716    LXSDX,
717    LXVB16X,
718    LXVD2X,
719    LXVWSX,
720    LXSIWZX,
721    LXV,
722    LXVX,
723    LXSD,
724    DFLOADf64,
725    XFLOADf64,
726    LIWZX
727)>;
728
729// 4 Cycle Load uses a single slice.
730def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
731      (instrs
732    (instregex "DCB(F|T|ST)(EP)?$"),
733    (instregex "DCBZ(L)?(EP)?$"),
734    (instregex "DCBTST(EP)?$"),
735    (instregex "CP_COPY(8)?$"),
736    (instregex "ICBI(EP)?$"),
737    (instregex "ICBT(LS)?$"),
738    (instregex "LBARX(L)?$"),
739    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
740    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
741    (instregex "LH(A|B)RX(L)?(8)?$"),
742    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
743    (instregex "LWARX(L)?$"),
744    (instregex "LWBRX(8)?$"),
745    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
746    CP_ABORT,
747    DARN,
748    EnforceIEIO,
749    ISYNC,
750    MSGSYNC,
751    TLBSYNC,
752    SYNC,
753    LMW,
754    LSWI
755)>;
756
757// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
758// superslice.
759def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
760      (instrs
761    LFIWZX,
762    LFDX,
763    LFD
764)>;
765
766// Cracked Load Instructions.
767// Load instructions that can be done in parallel.
768def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
769              DISP_PAIR_1C],
770      (instrs
771    SLBIA,
772    SLBIE,
773    SLBMFEE,
774    SLBMFEV,
775    SLBMTE,
776    TLBIEL
777)>;
778
779// Cracked Load Instruction.
780// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
781// operations can be run in parallel.
782def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
783              DISP_PAIR_1C, DISP_PAIR_1C],
784      (instrs
785    (instregex "L(W|H)ZU(X)?(8)?$")
786)>;
787
788// Cracked TEND Instruction.
789// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
790// operations can be run in parallel.
791def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
792              DISP_1C, DISP_1C],
793      (instrs
794    TEND
795)>;
796
797
798// Cracked Store Instruction
799// Consecutive Store and ALU instructions. The store is restricted and requires
800// three dispatches.
801def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
802              DISP_3SLOTS_1C, DISP_1C],
803      (instrs
804    (instregex "ST(B|H|W|D)CX$")
805)>;
806
807// Cracked Load Instruction.
808// Two consecutive load operations for a total of 8 cycles.
809def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
810              DISP_1C, DISP_1C],
811      (instrs
812    LDMX
813)>;
814
815// Cracked Load instruction.
816// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
817// operations cannot be done at the same time and so their latencies are added.
818def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
819              DISP_1C, DISP_1C],
820      (instrs
821    (instregex "LHA(X)?(8)?$"),
822    (instregex "CP_PASTE(8)?_rec$"),
823    (instregex "LWA(X)?(_32)?$"),
824    TCHECK
825)>;
826
827// Cracked Restricted Load instruction.
828// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
829// operations cannot be done at the same time and so their latencies are added.
830// Full 6 dispatches are required as this is both cracked and restricted.
831def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
832              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
833      (instrs
834    LFIWAX
835)>;
836
837// Cracked Load instruction.
838// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
839// operations cannot be done at the same time and so their latencies are added.
840// Full 4 dispatches are required as this is a cracked instruction.
841def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
842      (instrs
843    LXSIWAX,
844    LIWAX
845)>;
846
847// Cracked Load instruction.
848// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
849// cycles. The Load and ALU operations cannot be done at the same time and so
850// their latencies are added.
851// Full 6 dispatches are required as this is a restricted instruction.
852def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
853              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
854      (instrs
855    LFSX,
856    LFS
857)>;
858
859// Cracked Load instruction.
860// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
861// operations cannot be done at the same time and so their latencies are added.
862// Full 4 dispatches are required as this is a cracked instruction.
863def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
864      (instrs
865    LXSSP,
866    LXSSPX,
867    XFLOADf32,
868    DFLOADf32
869)>;
870
871// Cracked 3-Way Load Instruction
872// Load with two ALU operations that depend on each other
873def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
874              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
875      (instrs
876    (instregex "LHAU(X)?(8)?$"),
877    LWAUX
878)>;
879
880// Cracked Load that requires the PM resource.
881// Since the Load and the PM cannot be done at the same time the latencies are
882// added. Requires 8 cycles. Since the PM requires the full superslice we need
883// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
884// requires the remaining 1 dispatch.
885def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
886              DISP_1C, DISP_1C],
887      (instrs
888    LXVH8X,
889    LXVDSX,
890    LXVW4X
891)>;
892
893// Single slice Restricted store operation. The restricted operation requires
894// all three dispatches for the superslice.
895def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
896      (instrs
897    (instregex "STF(S|D|IWX|SX|DX)$"),
898    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
899    (instregex "STW(8)?$"),
900    (instregex "(D|X)FSTORE(f32|f64)$"),
901    (instregex "ST(W|H|D)BRX$"),
902    (instregex "ST(B|H|D)(8)?$"),
903    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
904    STIWX,
905    SLBIEG,
906    STMW,
907    STSWI,
908    TLBIE
909)>;
910
911// Vector Store Instruction
912// Requires the whole superslice and therefore requires one dispatch
913// as well as both the Even and Odd exec pipelines.
914def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
915      (instrs
916    (instregex "STVE(B|H|W)X$"),
917    (instregex "STVX(L)?$"),
918    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
919)>;
920
921// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
922// superslice. That includes both exec pipelines (EXECO, EXECE) and two
923// dispatches.
924def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
925      (instrs
926    (instregex "MTCTR(8)?(loop)?$"),
927    (instregex "MTLR(8)?$")
928)>;
929
930// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
931// superslice. That includes both exec pipelines (EXECO, EXECE) and two
932// dispatches.
933def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
934      (instrs
935    (instregex "M(T|F)VRSAVE(v)?$"),
936    (instregex "M(T|F)PMR$"),
937    (instregex "M(T|F)TB(8)?$"),
938    (instregex "MF(SPR|CTR|LR)(8)?$"),
939    (instregex "M(T|F)MSR(D)?$"),
940    (instregex "MTSPR(8)?$")
941)>;
942
943// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
944// superslice. That includes both exec pipelines (EXECO, EXECE) and two
945// dispatches.
946def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
947      (instrs
948    DIVW,
949    DIVWO,
950    DIVWU,
951    DIVWUO,
952    MODSW
953)>;
954
955// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
956// superslice. That includes both exec pipelines (EXECO, EXECE) and two
957// dispatches.
958def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
959      (instrs
960    DIVWE,
961    DIVWEO,
962    DIVD,
963    DIVDO,
964    DIVWEU,
965    DIVWEUO,
966    DIVDU,
967    DIVDUO,
968    MODSD,
969    MODUD,
970    MODUW
971)>;
972
973// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
974// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
975// dispatches.
976def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
977      (instrs
978    DIVDE,
979    DIVDEO,
980    DIVDEU,
981    DIVDEUO
982)>;
983
984// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
985// and one full superslice for the DIV operation since there is only one DIV per
986// superslice. Latency of DIV plus ALU is 26.
987def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
988              DISP_EVEN_1C, DISP_1C],
989      (instrs
990    (instregex "DIVW(U)?(O)?_rec$")
991)>;
992
993// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
994// and one full superslice for the DIV operation since there is only one DIV per
995// superslice. Latency of DIV plus ALU is 26.
996def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
997              DISP_EVEN_1C, DISP_1C],
998      (instrs
999    DIVD_rec,
1000    DIVDO_rec,
1001    DIVDU_rec,
1002    DIVDUO_rec,
1003    DIVWE_rec,
1004    DIVWEO_rec,
1005    DIVWEU_rec,
1006    DIVWEUO_rec
1007)>;
1008
1009// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1010// and one full superslice for the DIV operation since there is only one DIV per
1011// superslice. Latency of DIV plus ALU is 42.
1012def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1013              DISP_EVEN_1C, DISP_1C],
1014      (instrs
1015    DIVDE_rec,
1016    DIVDEO_rec,
1017    DIVDEU_rec,
1018    DIVDEUO_rec
1019)>;
1020
1021// CR access instructions in _BrMCR, IIC_BrMCRX.
1022
1023// Cracked, restricted, ALU operations.
1024// Here the two ALU ops can actually be done in parallel and therefore the
1025// latencies are not added together. Otherwise this is like having two
1026// instructions running together on two pipelines and 6 dispatches. ALU ops are
1027// 2 cycles each.
1028def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1029              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1030      (instrs
1031    MTCRF,
1032    MTCRF8
1033)>;
1034
1035// Cracked ALU operations.
1036// Here the two ALU ops can actually be done in parallel and therefore the
1037// latencies are not added together. Otherwise this is like having two
1038// instructions running together on two pipelines and 2 dispatches. ALU ops are
1039// 2 cycles each.
1040def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1041              DISP_1C, DISP_1C],
1042      (instrs
1043    (instregex "ADDC(8)?(O)?_rec$"),
1044    (instregex "SUBFC(8)?(O)?_rec$")
1045)>;
1046
1047// Cracked ALU operations.
1048// Two ALU ops can be done in parallel.
1049// One is three cycle ALU the ohter is a two cycle ALU.
1050// One of the ALU ops is restricted the other is not so we have a total of
1051// 5 dispatches.
1052def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1053              DISP_3SLOTS_1C, DISP_1C],
1054      (instrs
1055    (instregex "F(N)?ABS(D|S)_rec$"),
1056    (instregex "FCPSGN(D|S)_rec$"),
1057    (instregex "FNEG(D|S)_rec$"),
1058    FMR_rec
1059)>;
1060
1061// Cracked ALU operations.
1062// Here the two ALU ops can actually be done in parallel and therefore the
1063// latencies are not added together. Otherwise this is like having two
1064// instructions running together on two pipelines and 2 dispatches.
1065// ALU ops are 3 cycles each.
1066def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1067              DISP_1C, DISP_1C],
1068      (instrs
1069    MCRFS
1070)>;
1071
1072// Cracked Restricted ALU operations.
1073// Here the two ALU ops can actually be done in parallel and therefore the
1074// latencies are not added together. Otherwise this is like having two
1075// instructions running together on two pipelines and 6 dispatches.
1076// ALU ops are 3 cycles each.
1077def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1078              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1079      (instrs
1080    (instregex "MTFSF(b|_rec)?$"),
1081    (instregex "MTFSFI(_rec)?$")
1082)>;
1083
1084// Cracked instruction made of two ALU ops.
1085// The two ops cannot be done in parallel.
1086// One of the ALU ops is restricted and takes 3 dispatches.
1087def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1088              DISP_3SLOTS_1C, DISP_1C],
1089      (instrs
1090    (instregex "RLD(I)?C(R|L)_rec$"),
1091    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1092    (instregex "SLW(8)?_rec$"),
1093    (instregex "SRAW(I)?_rec$"),
1094    (instregex "SRW(8)?_rec$"),
1095    RLDICL_32_rec,
1096    RLDIMI_rec
1097)>;
1098
1099// Cracked instruction made of two ALU ops.
1100// The two ops cannot be done in parallel.
1101// Both of the ALU ops are restricted and take 3 dispatches.
1102def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1103              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1104      (instrs
1105    (instregex "MFFS(L|CE|_rec)?$")
1106)>;
1107
1108// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1109// total of 6 cycles. All of the ALU operations are also restricted so each
1110// takes 3 dispatches for a total of 9.
1111def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1112              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1113      (instrs
1114    (instregex "MFCR(8)?$")
1115)>;
1116
1117// Cracked instruction made of two ALU ops.
1118// The two ops cannot be done in parallel.
1119def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1120      (instrs
1121    (instregex "EXTSWSLI_32_64_rec$"),
1122    (instregex "SRAD(I)?_rec$"),
1123    EXTSWSLI_rec,
1124    SLD_rec,
1125    SRD_rec,
1126    RLDIC_rec
1127)>;
1128
1129// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1130def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1131      (instrs
1132    FDIV
1133)>;
1134
1135// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1136def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1137              DISP_3SLOTS_1C, DISP_1C],
1138      (instrs
1139    FDIV_rec
1140)>;
1141
1142// 36 Cycle DP Instruction.
1143// Instruction can be done on a single slice.
1144def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1145      (instrs
1146    XSSQRTDP
1147)>;
1148
1149// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1150def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1151      (instrs
1152    FSQRT
1153)>;
1154
1155// 36 Cycle DP Vector Instruction.
1156def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1157              DISP_1C],
1158      (instrs
1159    XVSQRTDP
1160)>;
1161
1162// 27 Cycle DP Vector Instruction.
1163def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1164              DISP_1C],
1165      (instrs
1166    XVSQRTSP
1167)>;
1168
1169// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1170def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1171              DISP_3SLOTS_1C, DISP_1C],
1172      (instrs
1173    FSQRT_rec
1174)>;
1175
1176// 26 Cycle DP Instruction.
1177def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1178      (instrs
1179    XSSQRTSP
1180)>;
1181
1182// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1183def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1184      (instrs
1185    FSQRTS
1186)>;
1187
1188// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1189def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1190              DISP_3SLOTS_1C, DISP_1C],
1191      (instrs
1192    FSQRTS_rec
1193)>;
1194
1195// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1196def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1197      (instrs
1198    XSDIVDP
1199)>;
1200
1201// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1202def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1203      (instrs
1204    FDIVS
1205)>;
1206
1207// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1208def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1209              DISP_3SLOTS_1C, DISP_1C],
1210      (instrs
1211    FDIVS_rec
1212)>;
1213
1214// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1215def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1216      (instrs
1217    XSDIVSP
1218)>;
1219
1220// 24 Cycle DP Vector Instruction. Takes one full superslice.
1221// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1222// superslice.
1223def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1224              DISP_1C],
1225      (instrs
1226    XVDIVSP
1227)>;
1228
1229// 33 Cycle DP Vector Instruction. Takes one full superslice.
1230// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1231// superslice.
1232def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1233              DISP_1C],
1234      (instrs
1235    XVDIVDP
1236)>;
1237
1238// Instruction cracked into three pieces. One Load and two ALU operations.
1239// The Load and one of the ALU ops cannot be run at the same time and so the
1240// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1241// Both the load and the ALU that depends on it are restricted and so they take
1242// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1243// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1244def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1245              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1246              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1247      (instrs
1248    (instregex "LF(SU|SUX)$")
1249)>;
1250
1251// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1252// the store and so it can be run at the same time as the store. The store is
1253// also restricted.
1254def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1255              DISP_3SLOTS_1C, DISP_1C],
1256      (instrs
1257    (instregex "STF(S|D)U(X)?$"),
1258    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1259)>;
1260
1261// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1262// the load and so it can be run at the same time as the load.
1263def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1264              DISP_PAIR_1C, DISP_PAIR_1C],
1265      (instrs
1266    (instregex "LBZU(X)?(8)?$"),
1267    (instregex "LDU(X)?$")
1268)>;
1269
1270// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1271// the load and so it can be run at the same time as the load. The load is also
1272// restricted. 3 dispatches are from the restricted load while the other two
1273// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1274// is required for the ALU.
1275def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1276              DISP_3SLOTS_1C, DISP_1C],
1277      (instrs
1278    (instregex "LF(DU|DUX)$")
1279)>;
1280
1281// Crypto Instructions
1282
1283// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1284// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1285// dispatch.
1286def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1287      (instrs
1288    (instregex "VPMSUM(B|H|W|D)$"),
1289    (instregex "V(N)?CIPHER(LAST)?$"),
1290    VSBOX
1291)>;
1292
1293// Branch Instructions
1294
1295// Two Cycle Branch
1296def : InstRW<[P9_BR_2C, DISP_BR_1C],
1297      (instrs
1298  (instregex "BCCCTR(L)?(8)?$"),
1299  (instregex "BCCL(A|R|RL)?$"),
1300  (instregex "BCCTR(L)?(8)?(n)?$"),
1301  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1302  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1303  (instregex "BL(_TLS|_NOP)?$"),
1304  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1305  (instregex "BLA(8|8_NOP)?$"),
1306  (instregex "BLR(8|L)?$"),
1307  (instregex "TAILB(A)?(8)?$"),
1308  (instregex "TAILBCTR(8)?$"),
1309  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1310  (instregex "BCLR(L)?(n)?$"),
1311  (instregex "BCTR(L)?(8)?$"),
1312  B,
1313  BA,
1314  BC,
1315  BCC,
1316  BCCA,
1317  BCL,
1318  BCLalways,
1319  BCLn,
1320  BCTRL8_LDinto_toc,
1321  BCTRL_LWZinto_toc,
1322  BCn,
1323  CTRL_DEP
1324)>;
1325
1326// Five Cycle Branch with a 2 Cycle ALU Op
1327// Operations must be done consecutively and not in parallel.
1328def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1329      (instrs
1330    ADDPCIS
1331)>;
1332
1333// Special Extracted Instructions For Atomics
1334
1335// Atomic Load
1336def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1337              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1338              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
1339              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1340      (instrs
1341    (instregex "L(D|W)AT$")
1342)>;
1343
1344// Atomic Store
1345def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1346              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1347      (instrs
1348    (instregex "ST(D|W)AT$")
1349)>;
1350
1351// Signal Processing Engine (SPE) Instructions
1352// These instructions are not supported on Power 9
1353def : InstRW<[],
1354    (instrs
1355  BRINC,
1356  EVABS,
1357  EVEQV,
1358  EVMRA,
1359  EVNAND,
1360  EVNEG,
1361  (instregex "EVADD(I)?W$"),
1362  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1363  (instregex "EVAND(C)?$"),
1364  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1365  (instregex "EVCNTL(S|Z)W$"),
1366  (instregex "EVDIVW(S|U)$"),
1367  (instregex "EVEXTS(B|H)$"),
1368  (instregex "EVLD(H|W|D)(X)?$"),
1369  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1370  (instregex "EVLWHE(X)?$"),
1371  (instregex "EVLWHO(S|U)(X)?$"),
1372  (instregex "EVLW(H|W)SPLAT(X)?$"),
1373  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1374  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1375  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1376  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1377  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1378  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1379  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1380  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1381  (instregex "EVMWHUMI(A)?$"),
1382  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1383  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1384  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1385  (instregex "EVMWSSF(A|AA|AN)?$"),
1386  (instregex "EVMWUMI(A|AA|AN)?$"),
1387  (instregex "EV(N|X)?OR(C)?$"),
1388  (instregex "EVR(LW|LWI|NDW)$"),
1389  (instregex "EVSLW(I)?$"),
1390  (instregex "EVSPLAT(F)?I$"),
1391  (instregex "EVSRW(I)?(S|U)$"),
1392  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1393  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1394  (instregex "EVSUB(I)?FW$")
1395)> { let Unsupported = 1; }
1396
1397// General Instructions without scheduling support.
1398def : InstRW<[],
1399    (instrs
1400  (instregex "(H)?RFI(D)?$"),
1401  (instregex "DSS(ALL)?$"),
1402  (instregex "DST(ST)?(T)?(64)?$"),
1403  (instregex "ICBL(C|Q)$"),
1404  (instregex "L(W|H|B)EPX$"),
1405  (instregex "ST(W|H|B)EPX$"),
1406  (instregex "(L|ST)FDEPX$"),
1407  (instregex "M(T|F)SR(IN)?$"),
1408  (instregex "M(T|F)DCR$"),
1409  (instregex "NOP_GT_PWR(6|7)$"),
1410  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1411  (instregex "WRTEE(I)?$"),
1412  (instregex "HASH(ST|STP|CHK|CHKP)$"),
1413  ATTN,
1414  CLRBHRB,
1415  MFBHRBE,
1416  MBAR,
1417  MSYNC,
1418  SLBSYNC,
1419  SLBFEE_rec,
1420  NAP,
1421  STOP,
1422  TRAP,
1423  RFCI,
1424  RFDI,
1425  RFMCI,
1426  SC,
1427  DCBA,
1428  DCBI,
1429  DCCCI,
1430  ICCCI
1431)> { let Unsupported = 1; }
1432