xref: /llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td (revision 90d79ca4c769ac3e28ec4b60dd82e6a5bb5e0aae)
1//==- RISCVSchedSiFiveP400.td - SiFiveP400 Scheduling Defs ---*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10
11/// c is true if mx has the worst case behavior compared to LMULs in MxList.
12/// On the SiFiveP400, the worst case LMUL is the Largest LMUL
13/// and the worst case sew is the smallest SEW for that LMUL.
14class SiFiveP400IsWorstCaseMX<string mx, list<string> MxList> {
15  string LLMUL = LargestLMUL<MxList>.r;
16  bit c = !eq(mx, LLMUL);
17}
18
19class SiFiveP400IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
20  string LLMUL = LargestLMUL<MxList>.r;
21  int SSEW = SmallestSEW<mx, isF>.r;
22  bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
23}
24
25// 1 Micro-Op per cycle.
26class SiFiveP400GetLMulCycles<string mx> {
27  int c = !cond(
28    !eq(mx, "M1") : 1,
29    !eq(mx, "M2") : 2,
30    !eq(mx, "M4") : 4,
31    !eq(mx, "M8") : 8,
32    !eq(mx, "MF2") : 1,
33    !eq(mx, "MF4") : 1,
34    !eq(mx, "MF8") : 1
35  );
36}
37
38// Latency for segmented loads and stores are calculated as vl * nf.
39class SiFiveP400GetCyclesSegmented<string mx, int sew, int nf> {
40  defvar VLEN = 128;
41  defvar VLUpperBound = !cond(
42    !eq(mx, "M1") : !div(VLEN, sew),
43    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
44    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
45    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
46    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
47    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
48    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
49  );
50  int c = !mul(VLUpperBound, nf);
51}
52
53// Both variants of floating point vector reductions are based on numbers collected
54// from llvm-exegesis.
55class VFReduceBaseCycles<int sew> {
56  // The latency for simple unordered VFReduce is `C + 6 * log2(LMUL)`,
57  // and `C * LMUL` for ordered VFReduce. This helper class provides the `C`.
58  int val = !cond(!eq(sew, 16): 16,
59                  !eq(sew, 32): 10,
60                  !eq(sew, 64): 6);
61}
62
63class AdvancedVFReduceCycles<int sew, string mx> {
64  // SEW = 64 has lower latencies and RThroughputs than other SEWs.
65  int latency = !cond(!eq(mx, "M1"): !if(!eq(sew, 64), 4, 6),
66                      !eq(mx, "M2"): !if(!eq(sew, 64), 6, 8),
67                      !eq(mx, "M4"): !if(!eq(sew, 64), 8, 10),
68                      !eq(mx, "M8"): !if(!eq(sew, 64), 11, 13),
69                      true: !if(!eq(sew, 64), 4, 6));
70  int rthroughput = !cond(!eq(mx, "M1"): !if(!eq(sew, 64), 2, 3),
71                          !eq(mx, "M2"): !if(!eq(sew, 64), 3, 4),
72                          !eq(mx, "M4"): !if(!eq(sew, 64), 5, 6),
73                          !eq(mx, "M8"): !if(!eq(sew, 64), 10, 12),
74                          true: !if(!eq(sew, 64), 2, 3));
75}
76
77// Both variants of integer vector reductions are based on numbers collected
78// from llvm-exegesis.
79// TODO: Fractional LMUL's latency and rthroughput.
80class SimpleVIReduceCycles<string mx> {
81  defvar LMul = SiFiveP400GetLMulCycles<mx>.c;
82  int latency = !mul(LMul, 2);
83  int rthroughput = !cond(
84                      !eq(mx, "M1"): 1,
85                      !eq(mx, "M2"): 2,
86                      !eq(mx, "M4"): 4,
87                      !eq(mx, "M8"): 9,
88                      true: 1);
89}
90
91class AdvancedVIReduceCycles<int sew, string mx> {
92  // `C - 2 * log2(SEW)`, where `C` = 16.1, 18.1, 20.1, and 23.8 for
93  // M1/2/4/8, respectively.
94  int latency = !cond(!eq(mx, "M1"): !sub(16, !mul(2, !logtwo(sew))),
95                      !eq(mx, "M2"): !sub(18, !mul(2, !logtwo(sew))),
96                      !eq(mx, "M4"): !sub(20, !mul(2, !logtwo(sew))),
97                      !eq(mx, "M8"): !sub(23, !mul(2, !logtwo(sew))),
98                      true: 4);
99  int rthroughput = !cond(
100                      // `8.3 - 1.02 * log2(SEW)`
101                      !eq(mx, "M1"): !sub(8, !logtwo(sew)),
102                      // `10.0 - 1.16 * log2(SEW)`. Note that `9 - log2(SEW)`
103                      // is closer to the floor value of the original formula.
104                      !eq(mx, "M2"): !sub(9, !logtwo(sew)),
105                      // `14.2 - 1.53 * log2(SEW)`
106                      !eq(mx, "M4"): !div(!sub(1420, !mul(153, !logtwo(sew))), 100),
107                      // `24.1 - 2.3 * log2(SEW)`
108                      !eq(mx, "M8"): !div(!sub(241, !mul(23, !logtwo(sew))), 10),
109                      true: 1);
110}
111
112class SiFiveP400VSM3CCycles<string mx> {
113  // c = ceil(LMUL / 2)
114  int c = !cond(!eq(mx, "M2") : 1,
115                !eq(mx, "M4") : 2,
116                !eq(mx, "M8") : 4,
117                true : 1);
118}
119
120def SiFiveP400Model : SchedMachineModel {
121  let IssueWidth = 3;         // 3 micro-ops are dispatched per cycle.
122  let MicroOpBufferSize = 56; // Max micro-ops that can be buffered.
123  let LoadLatency = 4;        // Cycles for loads to access the cache.
124  let MispredictPenalty = 9;  // Extra cycles for a mispredicted branch.
125  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
126                             HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
127                             HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
128                             HasStdExtZkr];
129  let CompleteModel = false;
130}
131
132// The SiFiveP400 microarchitecure has 6 pipelines:
133// Three pipelines for integer operations.
134// One pipeline for FPU operations.
135// One pipeline for Load operations.
136// One pipeline for Store operations.
137let SchedModel = SiFiveP400Model in {
138
139def SiFiveP400IEXQ0       : ProcResource<1>;
140def SiFiveP400IEXQ1       : ProcResource<1>;
141def SiFiveP400IEXQ2       : ProcResource<1>;
142def SiFiveP400FEXQ0       : ProcResource<1>;
143def SiFiveP400Load        : ProcResource<1>;
144def SiFiveP400Store       : ProcResource<1>;
145
146def SiFiveP400IntArith    : ProcResGroup<[SiFiveP400IEXQ0, SiFiveP400IEXQ1, SiFiveP400IEXQ2]>;
147defvar SiFiveP400Branch   = SiFiveP400IEXQ0;
148defvar SiFiveP400SYS      = SiFiveP400IEXQ1;
149defvar SiFiveP400MulDiv   = SiFiveP400IEXQ2;
150defvar SiFiveP400I2F      = SiFiveP400IEXQ2;
151def SiFiveP400Div         : ProcResource<1>;
152
153defvar SiFiveP400FloatArith  = SiFiveP400FEXQ0;
154defvar SiFiveP400F2I      = SiFiveP400FEXQ0;
155def SiFiveP400FloatDiv    : ProcResource<1>;
156
157// Vector pipeline
158def SiFiveP400VEXQ0        : ProcResource<1>;
159def SiFiveP400VLD          : ProcResource<1>;
160def SiFiveP400VST          : ProcResource<1>;
161def SiFiveP400VDiv         : ProcResource<1>;
162def SiFiveP400VFloatDiv    : ProcResource<1>;
163
164let Latency = 1 in {
165// Integer arithmetic and logic
166def : WriteRes<WriteIALU, [SiFiveP400IntArith]>;
167def : WriteRes<WriteIALU32, [SiFiveP400IntArith]>;
168def : WriteRes<WriteShiftImm, [SiFiveP400IntArith]>;
169def : WriteRes<WriteShiftImm32, [SiFiveP400IntArith]>;
170def : WriteRes<WriteShiftReg, [SiFiveP400IntArith]>;
171def : WriteRes<WriteShiftReg32, [SiFiveP400IntArith]>;
172// Branching
173def : WriteRes<WriteJmp, [SiFiveP400Branch]>;
174def : WriteRes<WriteJal, [SiFiveP400Branch]>;
175def : WriteRes<WriteJalr, [SiFiveP400Branch]>;
176}
177
178// CMOV
179def P400WriteCMOV : SchedWriteRes<[SiFiveP400Branch, SiFiveP400IEXQ1]> {
180  let Latency = 2;
181  let NumMicroOps = 2;
182}
183def : InstRW<[P400WriteCMOV], (instrs PseudoCCMOVGPRNoX0)>;
184
185let Latency = 2 in {
186// Integer multiplication
187def : WriteRes<WriteIMul, [SiFiveP400MulDiv]>;
188def : WriteRes<WriteIMul32, [SiFiveP400MulDiv]>;
189// cpop[w] look exactly like multiply.
190def : WriteRes<WriteCPOP, [SiFiveP400MulDiv]>;
191def : WriteRes<WriteCPOP32, [SiFiveP400MulDiv]>;
192}
193
194// Integer division
195def : WriteRes<WriteIDiv, [SiFiveP400MulDiv, SiFiveP400Div]> {
196  let Latency = 35;
197  let ReleaseAtCycles = [1, 34];
198}
199def : WriteRes<WriteIDiv32, [SiFiveP400MulDiv, SiFiveP400Div]> {
200  let Latency = 20;
201  let ReleaseAtCycles = [1, 19];
202}
203
204// Integer remainder
205def : WriteRes<WriteIRem, [SiFiveP400MulDiv, SiFiveP400Div]> {
206  let Latency = 35;
207  let ReleaseAtCycles = [1, 34];
208}
209def : WriteRes<WriteIRem32, [SiFiveP400MulDiv, SiFiveP400Div]> {
210  let Latency = 20;
211  let ReleaseAtCycles = [1, 19];
212}
213
214let Latency = 1 in {
215// Bitmanip
216def : WriteRes<WriteRotateImm, [SiFiveP400IntArith]>;
217def : WriteRes<WriteRotateImm32, [SiFiveP400IntArith]>;
218def : WriteRes<WriteRotateReg, [SiFiveP400IntArith]>;
219def : WriteRes<WriteRotateReg32, [SiFiveP400IntArith]>;
220
221def : WriteRes<WriteCLZ, [SiFiveP400IntArith]>;
222def : WriteRes<WriteCLZ32, [SiFiveP400IntArith]>;
223def : WriteRes<WriteCTZ, [SiFiveP400IntArith]>;
224def : WriteRes<WriteCTZ32, [SiFiveP400IntArith]>;
225
226def : WriteRes<WriteORCB, [SiFiveP400IntArith]>;
227def : WriteRes<WriteIMinMax, [SiFiveP400IntArith]>;
228
229def : WriteRes<WriteREV8, [SiFiveP400IntArith]>;
230
231def : WriteRes<WriteSHXADD, [SiFiveP400IntArith]>;
232def : WriteRes<WriteSHXADD32, [SiFiveP400IntArith]>;
233
234def : WriteRes<WriteSingleBit, [SiFiveP400IntArith]>;
235def : WriteRes<WriteSingleBitImm, [SiFiveP400IntArith]>;
236def : WriteRes<WriteBEXT, [SiFiveP400IntArith]>;
237def : WriteRes<WriteBEXTI, [SiFiveP400IntArith]>;
238}
239
240// Memory
241let Latency = 1 in {
242def : WriteRes<WriteSTB, [SiFiveP400Store]>;
243def : WriteRes<WriteSTH, [SiFiveP400Store]>;
244def : WriteRes<WriteSTW, [SiFiveP400Store]>;
245def : WriteRes<WriteSTD, [SiFiveP400Store]>;
246def : WriteRes<WriteFST16, [SiFiveP400Store]>;
247def : WriteRes<WriteFST32, [SiFiveP400Store]>;
248def : WriteRes<WriteFST64, [SiFiveP400Store]>;
249}
250let Latency = 4 in {
251def : WriteRes<WriteLDB, [SiFiveP400Load]>;
252def : WriteRes<WriteLDH, [SiFiveP400Load]>;
253}
254let Latency = 4 in {
255def : WriteRes<WriteLDW, [SiFiveP400Load]>;
256def : WriteRes<WriteLDD, [SiFiveP400Load]>;
257}
258
259let Latency = 5 in {
260def : WriteRes<WriteFLD16, [SiFiveP400Load]>;
261def : WriteRes<WriteFLD32, [SiFiveP400Load]>;
262def : WriteRes<WriteFLD64, [SiFiveP400Load]>;
263}
264
265// Atomic memory
266let Latency = 3 in {
267def : WriteRes<WriteAtomicSTW, [SiFiveP400Store]>;
268def : WriteRes<WriteAtomicSTD, [SiFiveP400Store]>;
269def : WriteRes<WriteAtomicW, [SiFiveP400Load]>;
270def : WriteRes<WriteAtomicD, [SiFiveP400Load]>;
271def : WriteRes<WriteAtomicLDW, [SiFiveP400Load]>;
272def : WriteRes<WriteAtomicLDD, [SiFiveP400Load]>;
273}
274
275// Floating point
276let Latency = 4 in {
277def : WriteRes<WriteFAdd16, [SiFiveP400FloatArith]>;
278def : WriteRes<WriteFAdd32, [SiFiveP400FloatArith]>;
279def : WriteRes<WriteFAdd64, [SiFiveP400FloatArith]>;
280
281def : WriteRes<WriteFMul16, [SiFiveP400FloatArith]>;
282def : WriteRes<WriteFMul32, [SiFiveP400FloatArith]>;
283def : WriteRes<WriteFMul64, [SiFiveP400FloatArith]>;
284
285def : WriteRes<WriteFMA16, [SiFiveP400FloatArith]>;
286def : WriteRes<WriteFMA32, [SiFiveP400FloatArith]>;
287def : WriteRes<WriteFMA64, [SiFiveP400FloatArith]>;
288}
289
290let Latency = 2 in {
291def : WriteRes<WriteFSGNJ16, [SiFiveP400FloatArith]>;
292def : WriteRes<WriteFSGNJ32, [SiFiveP400FloatArith]>;
293def : WriteRes<WriteFSGNJ64, [SiFiveP400FloatArith]>;
294
295def : WriteRes<WriteFMinMax16, [SiFiveP400FloatArith]>;
296def : WriteRes<WriteFMinMax32, [SiFiveP400FloatArith]>;
297def : WriteRes<WriteFMinMax64, [SiFiveP400FloatArith]>;
298}
299
300// Half precision.
301def : WriteRes<WriteFDiv16, [SiFiveP400FEXQ0, SiFiveP400FloatDiv]> {
302  let Latency = 19;
303  let ReleaseAtCycles = [1, 18];
304}
305def : WriteRes<WriteFSqrt16, [SiFiveP400FEXQ0, SiFiveP400FloatDiv]> {
306  let Latency = 18;
307  let ReleaseAtCycles = [1, 17];
308}
309
310// Single precision.
311def : WriteRes<WriteFDiv32, [SiFiveP400FEXQ0, SiFiveP400FloatDiv]> {
312  let Latency = 19;
313  let ReleaseAtCycles = [1, 18];
314}
315def : WriteRes<WriteFSqrt32, [SiFiveP400FEXQ0, SiFiveP400FloatDiv]> {
316  let Latency = 18;
317  let ReleaseAtCycles = [1, 17];
318}
319
320// Double precision
321def : WriteRes<WriteFDiv64, [SiFiveP400FEXQ0, SiFiveP400FloatDiv]> {
322  let Latency = 33;
323  let ReleaseAtCycles = [1, 32];
324}
325def : WriteRes<WriteFSqrt64, [SiFiveP400FEXQ0, SiFiveP400FloatDiv]> {
326  let Latency = 33;
327  let ReleaseAtCycles = [1, 32];
328}
329
330// Conversions
331let Latency = 2 in {
332def : WriteRes<WriteFCvtI32ToF16, [SiFiveP400I2F]>;
333def : WriteRes<WriteFCvtI32ToF32, [SiFiveP400I2F]>;
334def : WriteRes<WriteFCvtI32ToF64, [SiFiveP400I2F]>;
335def : WriteRes<WriteFCvtI64ToF16, [SiFiveP400I2F]>;
336def : WriteRes<WriteFCvtI64ToF32, [SiFiveP400I2F]>;
337def : WriteRes<WriteFCvtI64ToF64, [SiFiveP400I2F]>;
338def : WriteRes<WriteFCvtF16ToI32, [SiFiveP400F2I]>;
339def : WriteRes<WriteFCvtF16ToI64, [SiFiveP400F2I]>;
340def : WriteRes<WriteFCvtF16ToF32, [SiFiveP400FloatArith]>;
341def : WriteRes<WriteFCvtF16ToF64, [SiFiveP400FloatArith]>;
342def : WriteRes<WriteFCvtF32ToI32, [SiFiveP400F2I]>;
343def : WriteRes<WriteFCvtF32ToI64, [SiFiveP400F2I]>;
344def : WriteRes<WriteFCvtF32ToF16, [SiFiveP400FloatArith]>;
345def : WriteRes<WriteFCvtF32ToF64, [SiFiveP400FloatArith]>;
346def : WriteRes<WriteFCvtF64ToI32, [SiFiveP400F2I]>;
347def : WriteRes<WriteFCvtF64ToI64, [SiFiveP400F2I]>;
348def : WriteRes<WriteFCvtF64ToF16, [SiFiveP400FloatArith]>;
349def : WriteRes<WriteFCvtF64ToF32, [SiFiveP400FloatArith]>;
350
351def : WriteRes<WriteFClass16, [SiFiveP400F2I]>;
352def : WriteRes<WriteFClass32, [SiFiveP400F2I]>;
353def : WriteRes<WriteFClass64, [SiFiveP400F2I]>;
354def : WriteRes<WriteFCmp16, [SiFiveP400F2I]>;
355def : WriteRes<WriteFCmp32, [SiFiveP400F2I]>;
356def : WriteRes<WriteFCmp64, [SiFiveP400F2I]>;
357def : WriteRes<WriteFMovI16ToF16, [SiFiveP400I2F]>;
358def : WriteRes<WriteFMovF16ToI16, [SiFiveP400F2I]>;
359def : WriteRes<WriteFMovI32ToF32, [SiFiveP400I2F]>;
360def : WriteRes<WriteFMovF32ToI32, [SiFiveP400F2I]>;
361def : WriteRes<WriteFMovI64ToF64, [SiFiveP400I2F]>;
362def : WriteRes<WriteFMovF64ToI64, [SiFiveP400F2I]>;
363}
364
365// 6. Configuration-Setting Instructions
366def : WriteRes<WriteVSETVLI, [SiFiveP400SYS]>;
367def : WriteRes<WriteVSETIVLI, [SiFiveP400SYS]>;
368def : WriteRes<WriteVSETVL, [SiFiveP400SYS]>;
369
370// 7. Vector Loads and Stores
371// FIXME: This unit is still being improved, currently
372// it is based on stage numbers. Estimates are optimistic,
373// latency may be longer.
374foreach mx = SchedMxList in {
375  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
376  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
377  let Latency = 8, ReleaseAtCycles = [LMulLat] in {
378    defm "" : LMULWriteResMX<"WriteVLDE",    [SiFiveP400VLD], mx, IsWorstCase>;
379    defm "" : LMULWriteResMX<"WriteVLDM",    [SiFiveP400VLD], mx, IsWorstCase>;
380    defm "" : LMULWriteResMX<"WriteVLDFF",   [SiFiveP400VLD], mx, IsWorstCase>;
381  }
382  let Latency = 12, ReleaseAtCycles = [LMulLat] in {
383    defm "" : LMULWriteResMX<"WriteVLDS8",   [SiFiveP400VLD], mx, IsWorstCase>;
384    defm "" : LMULWriteResMX<"WriteVLDS16",  [SiFiveP400VLD], mx, IsWorstCase>;
385    defm "" : LMULWriteResMX<"WriteVLDS32",  [SiFiveP400VLD], mx, IsWorstCase>;
386    defm "" : LMULWriteResMX<"WriteVLDS64",  [SiFiveP400VLD], mx, IsWorstCase>;
387  }
388  let Latency = 12, ReleaseAtCycles = [LMulLat] in {
389    defm "" : LMULWriteResMX<"WriteVLDUX8",  [SiFiveP400VLD], mx, IsWorstCase>;
390    defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFiveP400VLD], mx, IsWorstCase>;
391    defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFiveP400VLD], mx, IsWorstCase>;
392    defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFiveP400VLD], mx, IsWorstCase>;
393    defm "" : LMULWriteResMX<"WriteVLDOX8",  [SiFiveP400VLD], mx, IsWorstCase>;
394    defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFiveP400VLD], mx, IsWorstCase>;
395    defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFiveP400VLD], mx, IsWorstCase>;
396    defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFiveP400VLD], mx, IsWorstCase>;
397  }
398}
399
400foreach mx = SchedMxList in {
401  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
402  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
403  let Latency = 8, ReleaseAtCycles = [LMulLat] in {
404    defm "" : LMULWriteResMX<"WriteVSTE",    [SiFiveP400VST], mx, IsWorstCase>;
405    defm "" : LMULWriteResMX<"WriteVSTM",    [SiFiveP400VST], mx, IsWorstCase>;
406  }
407  let Latency = 12, ReleaseAtCycles = [LMulLat] in {
408    defm "" : LMULWriteResMX<"WriteVSTS8",   [SiFiveP400VST], mx, IsWorstCase>;
409    defm "" : LMULWriteResMX<"WriteVSTS16",  [SiFiveP400VST], mx, IsWorstCase>;
410    defm "" : LMULWriteResMX<"WriteVSTS32",  [SiFiveP400VST], mx, IsWorstCase>;
411    defm "" : LMULWriteResMX<"WriteVSTS64",  [SiFiveP400VST], mx, IsWorstCase>;
412  }
413  let Latency = 12, ReleaseAtCycles = [LMulLat] in {
414    defm "" : LMULWriteResMX<"WriteVSTUX8",  [SiFiveP400VST], mx, IsWorstCase>;
415    defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFiveP400VST], mx, IsWorstCase>;
416    defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFiveP400VST], mx, IsWorstCase>;
417    defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFiveP400VST], mx, IsWorstCase>;
418    defm "" : LMULWriteResMX<"WriteVSTOX8",  [SiFiveP400VST], mx, IsWorstCase>;
419    defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFiveP400VST], mx, IsWorstCase>;
420    defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFiveP400VST], mx, IsWorstCase>;
421    defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFiveP400VST], mx, IsWorstCase>;
422  }
423}
424
425foreach mx = SchedMxList in {
426  foreach nf=2-8 in {
427    foreach eew = [8, 16, 32, 64] in {
428      defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
429      defvar LMulLat = SiFiveP400GetCyclesSegmented<mx, eew, nf>.c;
430      let Latency = !add(12, LMulLat), ReleaseAtCycles = [!add(12, LMulLat)] in {
431        defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew,   [SiFiveP400VLD], mx, IsWorstCase>;
432        defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [SiFiveP400VLD], mx, IsWorstCase>;
433        defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew,  [SiFiveP400VLD], mx, IsWorstCase>;
434        defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [SiFiveP400VLD], mx, IsWorstCase>;
435        defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [SiFiveP400VLD], mx, IsWorstCase>;
436      }
437      let Latency = !add(1, LMulLat), ReleaseAtCycles = [!add(12, LMulLat)] in {
438        defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew,   [SiFiveP400VST], mx, IsWorstCase>;
439        defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew,  [SiFiveP400VST], mx, IsWorstCase>;
440        defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [SiFiveP400VST], mx, IsWorstCase>;
441        defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [SiFiveP400VST], mx, IsWorstCase>;
442      }
443    }
444  }
445}
446
447// Whole register move/load/store
448foreach LMul = [1, 2, 4, 8] in {
449  let Latency = 8, ReleaseAtCycles = [LMul] in {
450    def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SiFiveP400VLD]>;
451    def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SiFiveP400VST]>;
452  }
453  let Latency = 2, ReleaseAtCycles = [LMul] in {
454    def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SiFiveP400VEXQ0]>;
455  }
456}
457
458// 11. Vector Integer Arithmetic Instructions
459foreach mx = SchedMxList in {
460  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
461  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
462  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
463    defm "" : LMULWriteResMX<"WriteVIALUV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
464    defm "" : LMULWriteResMX<"WriteVIALUX",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
465    defm "" : LMULWriteResMX<"WriteVIALUI",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
466    defm "" : LMULWriteResMX<"WriteVExtV",     [SiFiveP400VEXQ0], mx, IsWorstCase>;
467    defm "" : LMULWriteResMX<"WriteVICALUV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
468    defm "" : LMULWriteResMX<"WriteVICALUX",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
469    defm "" : LMULWriteResMX<"WriteVICALUI",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
470    defm "" : LMULWriteResMX<"WriteVICALUMV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
471    defm "" : LMULWriteResMX<"WriteVICALUMX",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
472    defm "" : LMULWriteResMX<"WriteVICALUMI",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
473    defm "" : LMULWriteResMX<"WriteVICmpV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
474    defm "" : LMULWriteResMX<"WriteVICmpX",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
475    defm "" : LMULWriteResMX<"WriteVICmpI",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
476    defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
477    defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
478    defm "" : LMULWriteResMX<"WriteVIMergeV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
479    defm "" : LMULWriteResMX<"WriteVIMergeX",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
480    defm "" : LMULWriteResMX<"WriteVIMergeI",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
481    defm "" : LMULWriteResMX<"WriteVIMovV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
482    defm "" : LMULWriteResMX<"WriteVIMovX",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
483    defm "" : LMULWriteResMX<"WriteVIMovI",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
484  }
485
486  let Latency = !if(!lt(LMulLat, 2), 2, LMulLat), ReleaseAtCycles = [LMulLat] in {
487    defm "" : LMULWriteResMX<"WriteVShiftV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
488    defm "" : LMULWriteResMX<"WriteVShiftX",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
489    defm "" : LMULWriteResMX<"WriteVShiftI",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
490  }
491
492  let Latency = !if(!eq(mx, "M8"), 9, 6), ReleaseAtCycles = [LMulLat] in {
493    defm "" : LMULWriteResMX<"WriteVIMulV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
494    defm "" : LMULWriteResMX<"WriteVIMulX",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
495    defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
496    defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
497  }
498}
499// Widening
500foreach mx = SchedMxListW in {
501  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
502  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxListW>.c;
503  let Latency = 6, ReleaseAtCycles = [LMulLat] in {
504    defm "" : LMULWriteResMX<"WriteVIWALUV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
505    defm "" : LMULWriteResMX<"WriteVIWALUX",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
506    defm "" : LMULWriteResMX<"WriteVIWALUI",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
507    defm "" : LMULWriteResMX<"WriteVIWMulV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
508    defm "" : LMULWriteResMX<"WriteVIWMulX",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
509    defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
510    defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
511  }
512}
513
514// Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64.
515foreach mx = SchedMxList in {
516  foreach sew = SchedSEWSet<mx>.val in {
517    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
518    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
519    defvar DivMicroOpLat =
520      !cond(!eq(sew, 8): 51, !eq(sew, 16): 45, !eq(sew, 32): 42,
521      /* SEW=64 */ true: 72);
522    defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
523    let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
524      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFiveP400VEXQ0, SiFiveP400VDiv], mx, sew, IsWorstCase>;
525      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFiveP400VEXQ0, SiFiveP400VDiv], mx, sew, IsWorstCase>;
526    }
527  }
528}
529
530// Narrowing Shift and Clips
531foreach mx = SchedMxListW in {
532  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
533  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxListW>.c;
534  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
535    defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
536    defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
537    defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFiveP400VEXQ0], mx, IsWorstCase>;
538    defm "" : LMULWriteResMX<"WriteVNClipV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
539    defm "" : LMULWriteResMX<"WriteVNClipX",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
540    defm "" : LMULWriteResMX<"WriteVNClipI",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
541  }
542}
543
544// 12. Vector Fixed-Point Arithmetic Instructions
545foreach mx = SchedMxList in {
546  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
547  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
548  let Latency = 6, ReleaseAtCycles = [LMulLat] in {
549    defm "" : LMULWriteResMX<"WriteVSALUV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
550    defm "" : LMULWriteResMX<"WriteVSALUX",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
551    defm "" : LMULWriteResMX<"WriteVSALUI",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
552    defm "" : LMULWriteResMX<"WriteVAALUV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
553    defm "" : LMULWriteResMX<"WriteVAALUX",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
554    defm "" : LMULWriteResMX<"WriteVSMulV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
555    defm "" : LMULWriteResMX<"WriteVSMulX",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
556    defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
557    defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
558    defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFiveP400VEXQ0], mx, IsWorstCase>;
559  }
560}
561
562// 13. Vector Floating-Point Instructions
563foreach mx = SchedMxListF in {
564  foreach sew = SchedSEWSet<mx, isF=1>.val in {
565    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
566    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
567    let Latency = 6, ReleaseAtCycles = [LMulLat] in {
568      defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV",  [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
569      defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF",  [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
570      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV",  [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
571      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF",  [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
572      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
573      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
574    }
575  }
576}
577foreach mx = SchedMxListF in {
578  foreach sew = SchedSEWSet<mx, isF=1>.val in {
579    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
580    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
581    let Latency = 2, ReleaseAtCycles = [LMulLat] in {
582      defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
583      defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
584      defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
585      defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
586      defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
587    }
588    let Latency = 3, ReleaseAtCycles = [LMulLat] in
589    defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
590  }
591}
592foreach mx = SchedMxList in {
593  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
594  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
595  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
596    defm "" : LMULWriteResMX<"WriteVFCmpV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
597    defm "" : LMULWriteResMX<"WriteVFCmpF",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
598    defm "" : LMULWriteResMX<"WriteVFClassV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
599    defm "" : LMULWriteResMX<"WriteVFMergeV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
600    defm "" : LMULWriteResMX<"WriteVFMovV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
601  }
602  let Latency = 3, ReleaseAtCycles = [LMulLat] in
603  defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
604}
605
606// Widening
607foreach mx = SchedMxListW in {
608  foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
609    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
610    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
611    let Latency = 3, ReleaseAtCycles = [LMulLat] in
612    defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
613  }
614}
615foreach mx = SchedMxListFW in {
616  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
617  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxListFW>.c;
618  let Latency = 6, ReleaseAtCycles = [LMulLat] in
619  defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
620}
621foreach mx = SchedMxListFW in {
622  foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
623    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
624    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
625    let Latency = 6, ReleaseAtCycles = [LMulLat] in {
626      defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
627      defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
628      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
629      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
630      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
631      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
632      defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
633    }
634  }
635}
636// Narrowing
637foreach mx = SchedMxListW in {
638  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
639  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxListW>.c;
640  let Latency = 3, ReleaseAtCycles = [LMulLat] in
641  defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
642}
643foreach mx = SchedMxListFW in {
644  foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
645  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
646  defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
647    let Latency = 3, ReleaseAtCycles = [LMulLat] in {
648      defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
649      defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
650    }
651  }
652}
653
654// Worst case needs around 29/25/37 * LMUL cycles for f16/32/64.
655foreach mx = SchedMxListF in {
656  foreach sew = SchedSEWSet<mx, 1>.val in {
657    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
658    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
659    defvar DivMicroOpLat =
660      !cond(!eq(sew, 16): 29, !eq(sew, 32): 25, /* SEW=64 */ true: 37);
661    defvar DivLatency = !mul(DivMicroOpLat, LMulLat);
662    let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in {
663      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV",  [SiFiveP400VEXQ0, SiFiveP400VFloatDiv], mx, sew, IsWorstCase>;
664      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF",  [SiFiveP400VEXQ0, SiFiveP400VFloatDiv], mx, sew, IsWorstCase>;
665      defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFiveP400VEXQ0, SiFiveP400VFloatDiv], mx, sew, IsWorstCase>;
666    }
667  }
668}
669
670// 14. Vector Reduction Operations
671foreach mx = SchedMxList in {
672  foreach sew = SchedSEWSet<mx>.val in {
673    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
674
675    // Simple reduction
676    defvar SimpleC = SimpleVIReduceCycles<mx>;
677    let Latency = SimpleC.latency, ReleaseAtCycles = [SimpleC.rthroughput] in
678    defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From",  [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
679
680    // Advanced reduction
681    defvar AdvancedC = AdvancedVIReduceCycles<sew, mx>;
682    let Latency = AdvancedC.latency, ReleaseAtCycles = [AdvancedC.rthroughput] in
683    defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFiveP400VEXQ0],
684                                   mx, sew, IsWorstCase>;
685  }
686}
687
688foreach mx = SchedMxListWRed in {
689  foreach sew = SchedSEWSet<mx, 0, 1>.val in {
690    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
691    defvar SimpleC = SimpleVIReduceCycles<mx>;
692    let Latency = SimpleC.latency, ReleaseAtCycles = [SimpleC.rthroughput] in {
693      defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFiveP400VEXQ0],
694                                      mx, sew, IsWorstCase>;
695    }
696  }
697}
698
699foreach mx = SchedMxListF in {
700  foreach sew = SchedSEWSet<mx, 1>.val in {
701    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
702    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
703
704    // Simple reduction.
705    defvar BaseC = VFReduceBaseCycles<sew>.val;
706    let Latency = !add(BaseC, !mul(6, !logtwo(LMulLat))), ReleaseAtCycles = [BaseC] in
707    defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFiveP400VEXQ0],
708                                   mx, sew, IsWorstCase>;
709
710    // Advanced reduction.
711    defvar AdvancedC = AdvancedVFReduceCycles<sew, mx>;
712    let Latency = AdvancedC.latency, ReleaseAtCycles = [AdvancedC.rthroughput] in
713    defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From",
714                                   [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
715
716    defvar OrderedRedCycles = !mul(BaseC, LMulLat);
717    let Latency = OrderedRedCycles, ReleaseAtCycles = [OrderedRedCycles] in
718    defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFiveP400VEXQ0],
719                                    mx, sew, IsWorstCase>;
720  }
721}
722
723foreach mx = SchedMxListFWRed in {
724  foreach sew = SchedSEWSet<mx, 1, 1>.val in {
725    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
726    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
727    let Latency = !add(6, !mul(6, LMulLat)), ReleaseAtCycles = [LMulLat] in {
728      defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From",  [SiFiveP400VEXQ0],
729                                     mx, sew, IsWorstCase>;
730    }
731
732    defvar OrderedRedCycles = !mul(VFReduceBaseCycles<sew>.val, LMulLat);
733    let Latency = OrderedRedCycles, ReleaseAtCycles = [OrderedRedCycles] in
734    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFiveP400VEXQ0],
735                                   mx, sew, IsWorstCase>;
736  }
737}
738
739// 15. Vector Mask Instructions
740foreach mx = SchedMxList in {
741  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
742  let Latency = 2, ReleaseAtCycles = [1] in {
743    defm "" : LMULWriteResMX<"WriteVMALUV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
744    defm "" : LMULWriteResMX<"WriteVMPopV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
745    defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
746    defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
747  }
748  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
749  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
750    defm "" : LMULWriteResMX<"WriteVIotaV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
751    defm "" : LMULWriteResMX<"WriteVIdxV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
752  }
753}
754
755// 16. Vector Permutation Instructions
756// Simple Slide
757foreach mx = SchedMxList in {
758  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
759  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
760  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
761    defm "" : LMULWriteResMX<"WriteVSlideI",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
762  }
763  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
764    defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFiveP400VEXQ0], mx, IsWorstCase>;
765    defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFiveP400VEXQ0], mx, IsWorstCase>;
766  }
767}
768foreach mx = ["MF8", "MF4", "MF2", "M1"] in {
769  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
770  let Latency = 2, ReleaseAtCycles = [1] in {
771    defm "" : LMULWriteResMX<"WriteVSlideUpX",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
772    defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
773  }
774}
775
776// Complex Slide
777foreach mx = ["M2", "M4", "M8"] in {
778  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
779  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
780
781  defvar UpLatAndCycles = !add(8, LMulLat);
782  let Latency = UpLatAndCycles, ReleaseAtCycles = [UpLatAndCycles] in {
783    defm "" : LMULWriteResMX<"WriteVSlideUpX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
784  }
785  defvar DownLatAndCycles = !add(8, !div(!mul(LMulLat, 3), 2));
786  let Latency = DownLatAndCycles, ReleaseAtCycles = [DownLatAndCycles] in {
787    defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
788  }
789}
790
791let Latency = 2, ReleaseAtCycles = [2] in {
792  def : WriteRes<WriteVMovXS, [SiFiveP400VEXQ0]>;
793  def : WriteRes<WriteVMovSX, [SiFiveP400VEXQ0]>;
794}
795let Latency = 6, ReleaseAtCycles = [2] in {
796  def : WriteRes<WriteVMovFS, [SiFiveP400VEXQ0]>;
797  def : WriteRes<WriteVMovSF, [SiFiveP400VEXQ0]>;
798}
799
800// Simple Gather and Compress
801foreach mx = ["MF8", "MF4", "MF2", "M1"] in {
802  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
803  let Latency = 3, ReleaseAtCycles = [1] in {
804    defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
805  }
806}
807
808foreach mx = ["MF8", "MF4", "MF2", "M1"] in {
809  foreach sew = SchedSEWSet<mx>.val in {
810    defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
811    let Latency = 3, ReleaseAtCycles = [1] in {
812      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
813      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
814      defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
815    }
816  }
817}
818
819// Complex Gather and Compress
820foreach mx = ["M2", "M4", "M8"] in {
821  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
822  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
823  let Latency = 6, ReleaseAtCycles = [LMulLat] in {
824    defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
825  }
826}
827
828foreach mx = ["M2", "M4", "M8"] in {
829  foreach sew = SchedSEWSet<mx>.val in {
830    defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
831    defvar IsWorstCase = SiFiveP400IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
832    let Latency = 6, ReleaseAtCycles = [!add(!mul(LMulLat, 2), 8)] in {
833      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
834      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
835      defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
836    }
837  }
838}
839
840// Simple Vrgather.vi
841foreach mx = SchedMxList in {
842  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
843  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
844  let Latency = 3, ReleaseAtCycles = [LMulLat] in {
845    defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFiveP400VEXQ0], mx, IsWorstCase>;
846  }
847}
848
849// Vector Crypto
850foreach mx = SchedMxList in {
851  defvar LMulLat = SiFiveP400GetLMulCycles<mx>.c;
852  defvar IsWorstCase = SiFiveP400IsWorstCaseMX<mx, SchedMxList>.c;
853  // Zvbb
854  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
855    defm "" : LMULWriteResMX<"WriteVBREVV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
856    defm "" : LMULWriteResMX<"WriteVCLZV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
857    defm "" : LMULWriteResMX<"WriteVCPOPV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
858    defm "" : LMULWriteResMX<"WriteVCTZV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
859    defm "" : LMULWriteResMX<"WriteVWSLLV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
860    defm "" : LMULWriteResMX<"WriteVWSLLX",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
861    defm "" : LMULWriteResMX<"WriteVWSLLI",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
862  }
863  // Zvbc
864  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
865    defm "" : LMULWriteResMX<"WriteVCLMULV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
866    defm "" : LMULWriteResMX<"WriteVCLMULX", [SiFiveP400VEXQ0], mx, IsWorstCase>;
867  }
868  // Zvkb
869  // VANDN uses WriteVIALU[V|X|I]
870  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
871    defm "" : LMULWriteResMX<"WriteVBREV8V",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
872    defm "" : LMULWriteResMX<"WriteVREV8V",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
873    defm "" : LMULWriteResMX<"WriteVRotV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
874    defm "" : LMULWriteResMX<"WriteVRotX",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
875    defm "" : LMULWriteResMX<"WriteVRotI",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
876  }
877  // Zvkg
878  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
879    defm "" : LMULWriteResMX<"WriteVGHSHV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
880    defm "" : LMULWriteResMX<"WriteVGMULV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
881  }
882  // ZvknhaOrZvknhb
883  let Latency = 3, ReleaseAtCycles = [LMulLat] in {
884    defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
885    defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
886    defvar ZvknhSEWs = !listremove(SchedSEWSet<mx>.val, [8, 16]);
887    // Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending
888    // order.
889    defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr);
890    foreach sew = ZvknhSEWs in {
891      // The worst case for Zvknh[ab] is designated to the largest SEW and LMUL.
892      defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW));
893      defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew,
894                                     IsWorstCaseVSHA2MSV>;
895    }
896  }
897  // Zvkned
898  let Latency = 2, ReleaseAtCycles = [LMulLat] in {
899    defm "" : LMULWriteResMX<"WriteVAESMVV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
900    defm "" : LMULWriteResMX<"WriteVAESKF1V", [SiFiveP400VEXQ0], mx, IsWorstCase>;
901    defm "" : LMULWriteResMX<"WriteVAESKF2V", [SiFiveP400VEXQ0], mx, IsWorstCase>;
902    defm "" : LMULWriteResMX<"WriteVAESZV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
903  }
904  // Zvksed
905  let Latency = 3, ReleaseAtCycles = [SiFiveP400VSM3CCycles<mx>.c] in
906  defm "" : LMULWriteResMX<"WriteVSM3CV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
907  let Latency = 6, ReleaseAtCycles = [LMulLat] in
908  defm "" : LMULWriteResMX<"WriteVSM3MEV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
909  let Latency = 3, ReleaseAtCycles = [LMulLat] in {
910    defm "" : LMULWriteResMX<"WriteVSM4KV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
911    defm "" : LMULWriteResMX<"WriteVSM4RV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
912  }
913}
914
915// Others
916def : WriteRes<WriteCSR, [SiFiveP400SYS]>;
917def : WriteRes<WriteNop, []>;
918def : WriteRes<WriteRdVLENB, [SiFiveP400SYS]>;
919
920
921// FIXME: This could be better modeled by looking at the regclasses of the operands.
922def : InstRW<[WriteIALU, ReadIALU], (instrs COPY)>;
923
924//===----------------------------------------------------------------------===//
925// Bypass and advance
926def : ReadAdvance<ReadJmp, 0>;
927def : ReadAdvance<ReadJalr, 0>;
928def : ReadAdvance<ReadCSR, 0>;
929def : ReadAdvance<ReadStoreData, 0>;
930def : ReadAdvance<ReadMemBase, 0>;
931def : ReadAdvance<ReadIALU, 0>;
932def : ReadAdvance<ReadIALU32, 0>;
933def : ReadAdvance<ReadShiftImm, 0>;
934def : ReadAdvance<ReadShiftImm32, 0>;
935def : ReadAdvance<ReadShiftReg, 0>;
936def : ReadAdvance<ReadShiftReg32, 0>;
937def : ReadAdvance<ReadIDiv, 0>;
938def : ReadAdvance<ReadIDiv32, 0>;
939def : ReadAdvance<ReadIRem, 0>;
940def : ReadAdvance<ReadIRem32, 0>;
941def : ReadAdvance<ReadIMul, 0>;
942def : ReadAdvance<ReadIMul32, 0>;
943def : ReadAdvance<ReadAtomicWA, 0>;
944def : ReadAdvance<ReadAtomicWD, 0>;
945def : ReadAdvance<ReadAtomicDA, 0>;
946def : ReadAdvance<ReadAtomicDD, 0>;
947def : ReadAdvance<ReadAtomicLDW, 0>;
948def : ReadAdvance<ReadAtomicLDD, 0>;
949def : ReadAdvance<ReadAtomicSTW, 0>;
950def : ReadAdvance<ReadAtomicSTD, 0>;
951def : ReadAdvance<ReadFStoreData, 0>;
952def : ReadAdvance<ReadFMemBase, 0>;
953def : ReadAdvance<ReadFAdd16, 0>;
954def : ReadAdvance<ReadFAdd32, 0>;
955def : ReadAdvance<ReadFAdd64, 0>;
956def : ReadAdvance<ReadFMul16, 0>;
957def : ReadAdvance<ReadFMA16, 0>;
958def : ReadAdvance<ReadFMA16Addend, 0>;
959def : ReadAdvance<ReadFMul32, 0>;
960def : ReadAdvance<ReadFMA32, 0>;
961def : ReadAdvance<ReadFMA32Addend, 0>;
962def : ReadAdvance<ReadFMul64, 0>;
963def : ReadAdvance<ReadFMA64, 0>;
964def : ReadAdvance<ReadFMA64Addend, 0>;
965def : ReadAdvance<ReadFDiv16, 0>;
966def : ReadAdvance<ReadFDiv32, 0>;
967def : ReadAdvance<ReadFDiv64, 0>;
968def : ReadAdvance<ReadFSqrt16, 0>;
969def : ReadAdvance<ReadFSqrt32, 0>;
970def : ReadAdvance<ReadFSqrt64, 0>;
971def : ReadAdvance<ReadFCmp16, 0>;
972def : ReadAdvance<ReadFCmp32, 0>;
973def : ReadAdvance<ReadFCmp64, 0>;
974def : ReadAdvance<ReadFSGNJ16, 0>;
975def : ReadAdvance<ReadFSGNJ32, 0>;
976def : ReadAdvance<ReadFSGNJ64, 0>;
977def : ReadAdvance<ReadFMinMax16, 0>;
978def : ReadAdvance<ReadFMinMax32, 0>;
979def : ReadAdvance<ReadFMinMax64, 0>;
980def : ReadAdvance<ReadFCvtF16ToI32, 0>;
981def : ReadAdvance<ReadFCvtF16ToI64, 0>;
982def : ReadAdvance<ReadFCvtF32ToI32, 0>;
983def : ReadAdvance<ReadFCvtF32ToI64, 0>;
984def : ReadAdvance<ReadFCvtF64ToI32, 0>;
985def : ReadAdvance<ReadFCvtF64ToI64, 0>;
986def : ReadAdvance<ReadFCvtI32ToF16, 0>;
987def : ReadAdvance<ReadFCvtI32ToF32, 0>;
988def : ReadAdvance<ReadFCvtI32ToF64, 0>;
989def : ReadAdvance<ReadFCvtI64ToF16, 0>;
990def : ReadAdvance<ReadFCvtI64ToF32, 0>;
991def : ReadAdvance<ReadFCvtI64ToF64, 0>;
992def : ReadAdvance<ReadFCvtF32ToF64, 0>;
993def : ReadAdvance<ReadFCvtF64ToF32, 0>;
994def : ReadAdvance<ReadFCvtF16ToF32, 0>;
995def : ReadAdvance<ReadFCvtF32ToF16, 0>;
996def : ReadAdvance<ReadFCvtF16ToF64, 0>;
997def : ReadAdvance<ReadFCvtF64ToF16, 0>;
998def : ReadAdvance<ReadFMovF16ToI16, 0>;
999def : ReadAdvance<ReadFMovI16ToF16, 0>;
1000def : ReadAdvance<ReadFMovF32ToI32, 0>;
1001def : ReadAdvance<ReadFMovI32ToF32, 0>;
1002def : ReadAdvance<ReadFMovF64ToI64, 0>;
1003def : ReadAdvance<ReadFMovI64ToF64, 0>;
1004def : ReadAdvance<ReadFClass16, 0>;
1005def : ReadAdvance<ReadFClass32, 0>;
1006def : ReadAdvance<ReadFClass64, 0>;
1007
1008// Bitmanip
1009def : ReadAdvance<ReadRotateImm, 0>;
1010def : ReadAdvance<ReadRotateImm32, 0>;
1011def : ReadAdvance<ReadRotateReg, 0>;
1012def : ReadAdvance<ReadRotateReg32, 0>;
1013def : ReadAdvance<ReadCLZ, 0>;
1014def : ReadAdvance<ReadCLZ32, 0>;
1015def : ReadAdvance<ReadCTZ, 0>;
1016def : ReadAdvance<ReadCTZ32, 0>;
1017def : ReadAdvance<ReadCPOP, 0>;
1018def : ReadAdvance<ReadCPOP32, 0>;
1019def : ReadAdvance<ReadORCB, 0>;
1020def : ReadAdvance<ReadIMinMax, 0>;
1021def : ReadAdvance<ReadREV8, 0>;
1022def : ReadAdvance<ReadSHXADD, 0>;
1023def : ReadAdvance<ReadSHXADD32, 0>;
1024def : ReadAdvance<ReadSingleBit, 0>;
1025def : ReadAdvance<ReadSingleBitImm, 0>;
1026
1027// 6. Configuration-Setting Instructions
1028def : ReadAdvance<ReadVSETVLI, 0>;
1029def : ReadAdvance<ReadVSETVL, 0>;
1030
1031// 7. Vector Loads and Stores
1032def : ReadAdvance<ReadVLDX, 0>;
1033def : ReadAdvance<ReadVSTX, 0>;
1034defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
1035defm "" : LMULReadAdvance<"ReadVSTM", 0>;
1036def : ReadAdvance<ReadVLDSX, 0>;
1037def : ReadAdvance<ReadVSTSX, 0>;
1038defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
1039defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
1040defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
1041defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
1042defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
1043defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
1044defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
1045defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
1046defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
1047defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
1048defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
1049defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
1050defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
1051defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
1052defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
1053defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
1054defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
1055defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
1056defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
1057defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
1058defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
1059defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
1060defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
1061defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
1062// LMUL Aware
1063def : ReadAdvance<ReadVST1R, 0>;
1064def : ReadAdvance<ReadVST2R, 0>;
1065def : ReadAdvance<ReadVST4R, 0>;
1066def : ReadAdvance<ReadVST8R, 0>;
1067
1068// 12. Vector Integer Arithmetic Instructions
1069defm : LMULReadAdvance<"ReadVIALUV", 0>;
1070defm : LMULReadAdvance<"ReadVIALUX", 0>;
1071defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
1072defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
1073defm : LMULReadAdvance<"ReadVExtV", 0>;
1074defm : LMULReadAdvance<"ReadVICALUV", 0>;
1075defm : LMULReadAdvance<"ReadVICALUX", 0>;
1076defm : LMULReadAdvance<"ReadVShiftV", 0>;
1077defm : LMULReadAdvance<"ReadVShiftX", 0>;
1078defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
1079defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
1080defm : LMULReadAdvance<"ReadVICmpV", 0>;
1081defm : LMULReadAdvance<"ReadVICmpX", 0>;
1082defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
1083defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
1084defm : LMULReadAdvance<"ReadVIMulV", 0>;
1085defm : LMULReadAdvance<"ReadVIMulX", 0>;
1086defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
1087defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
1088defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
1089defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
1090defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
1091defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
1092defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
1093defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
1094defm : LMULReadAdvance<"ReadVIMergeV", 0>;
1095defm : LMULReadAdvance<"ReadVIMergeX", 0>;
1096defm : LMULReadAdvance<"ReadVIMovV", 0>;
1097defm : LMULReadAdvance<"ReadVIMovX", 0>;
1098
1099// 13. Vector Fixed-Point Arithmetic Instructions
1100defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
1101defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
1102defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
1103defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
1104defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
1105defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
1106defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
1107defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
1108defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
1109defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
1110
1111// 14. Vector Floating-Point Instructions
1112defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
1113defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
1114defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
1115defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
1116defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>;
1117defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>;
1118defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
1119defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
1120defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
1121defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
1122defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
1123defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
1124defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
1125defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
1126defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
1127defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
1128defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
1129defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
1130defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
1131defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
1132defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
1133defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
1134defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
1135defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
1136defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
1137defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
1138defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
1139defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
1140defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
1141defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
1142defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
1143defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
1144defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
1145defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
1146
1147// 15. Vector Reduction Operations
1148def : ReadAdvance<ReadVIRedV, 0>;
1149def : ReadAdvance<ReadVIRedV0, 0>;
1150def : ReadAdvance<ReadVIWRedV, 0>;
1151def : ReadAdvance<ReadVIWRedV0, 0>;
1152def : ReadAdvance<ReadVFRedV, 0>;
1153def : ReadAdvance<ReadVFRedV0, 0>;
1154def : ReadAdvance<ReadVFRedOV, 0>;
1155def : ReadAdvance<ReadVFRedOV0, 0>;
1156def : ReadAdvance<ReadVFWRedV, 0>;
1157def : ReadAdvance<ReadVFWRedV0, 0>;
1158def : ReadAdvance<ReadVFWRedOV, 0>;
1159def : ReadAdvance<ReadVFWRedOV0, 0>;
1160
1161// 16. Vector Mask Instructions
1162defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
1163defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
1164defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
1165defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
1166defm "" : LMULReadAdvance<"ReadVIotaV", 0>;
1167
1168// 17. Vector Permutation Instructions
1169def : ReadAdvance<ReadVMovXS, 0>;
1170def : ReadAdvance<ReadVMovSX_V, 0>;
1171def : ReadAdvance<ReadVMovSX_X, 0>;
1172def : ReadAdvance<ReadVMovFS, 0>;
1173def : ReadAdvance<ReadVMovSF_V, 0>;
1174def : ReadAdvance<ReadVMovSF_F, 0>;
1175defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
1176defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
1177defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
1178defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
1179defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
1180defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
1181defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>;
1182defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>;
1183defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
1184defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
1185defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
1186defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
1187// LMUL Aware
1188def : ReadAdvance<ReadVMov1V, 0>;
1189def : ReadAdvance<ReadVMov2V, 0>;
1190def : ReadAdvance<ReadVMov4V, 0>;
1191def : ReadAdvance<ReadVMov8V, 0>;
1192
1193// Others
1194def : ReadAdvance<ReadVMask, 0>;
1195def : ReadAdvance<ReadVPassthru_WorstCase, 0>;
1196foreach mx = SchedMxList in {
1197  def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx), 0>;
1198  foreach sew = SchedSEWSet<mx>.val in
1199    def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx  # "_E" # sew), 0>;
1200}
1201
1202// Vector Crypto Extensions
1203// Zvbb
1204defm "" : LMULReadAdvance<"ReadVBREVV", 0>;
1205defm "" : LMULReadAdvance<"ReadVCLZV", 0>;
1206defm "" : LMULReadAdvance<"ReadVCPOPV", 0>;
1207defm "" : LMULReadAdvance<"ReadVCTZV", 0>;
1208defm "" : LMULReadAdvance<"ReadVWSLLV", 0>;
1209defm "" : LMULReadAdvance<"ReadVWSLLX", 0>;
1210// Zvbc
1211defm "" : LMULReadAdvance<"ReadVCLMULV", 0>;
1212defm "" : LMULReadAdvance<"ReadVCLMULX", 0>;
1213// Zvkb
1214// VANDN uses ReadVIALU[V|X|I]
1215defm "" : LMULReadAdvance<"ReadVBREV8V", 0>;
1216defm "" : LMULReadAdvance<"ReadVREV8V", 0>;
1217defm "" : LMULReadAdvance<"ReadVRotV", 0>;
1218defm "" : LMULReadAdvance<"ReadVRotX", 0>;
1219// Zvkg
1220defm "" : LMULReadAdvance<"ReadVGHSHV", 0>;
1221defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
1222// Zvknha or Zvknhb
1223defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
1224defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
1225defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
1226// Zvkned
1227defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
1228defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;
1229defm "" : LMULReadAdvance<"ReadVAESKF2V", 0>;
1230defm "" : LMULReadAdvance<"ReadVAESZV", 0>;
1231// Zvksed
1232defm "" : LMULReadAdvance<"ReadVSM4KV", 0>;
1233defm "" : LMULReadAdvance<"ReadVSM4RV", 0>;
1234// Zbksh
1235defm "" : LMULReadAdvance<"ReadVSM3CV", 0>;
1236defm "" : LMULReadAdvance<"ReadVSM3MEV", 0>;
1237
1238//===----------------------------------------------------------------------===//
1239// Unsupported extensions
1240defm : UnsupportedSchedZabha;
1241defm : UnsupportedSchedZbc;
1242defm : UnsupportedSchedZbkb;
1243defm : UnsupportedSchedZbkx;
1244defm : UnsupportedSchedSFB;
1245defm : UnsupportedSchedZfa;
1246defm : UnsupportedSchedXsfvcp;
1247}
1248