xref: /llvm-project/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td (revision 5a4945fa4d515b3209a5e181621bf828e678769f)
1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10
11/// c is true if mx has the worst case behavior compared to LMULs in MxList.
12/// On the SiFive7, the worst case LMUL is the Largest LMUL
13/// and the worst case sew is the smallest SEW for that LMUL.
14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> {
15  defvar LLMUL = LargestLMUL<MxList>.r;
16  bit c = !eq(mx, LLMUL);
17}
18
19/// c is true if mx and sew have the worst case behavior compared to LMULs in
20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL
21/// and the worst case sew is the smallest SEW for that LMUL.
22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
23                               bit isF = 0> {
24  defvar LLMUL = LargestLMUL<MxList>.r;
25  defvar SSEW = SmallestSEW<mx, isF>.r;
26  bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
27}
28
29/// Number of DLEN parts = (LMUL * VLEN) / DLEN.
30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL.
31class SiFive7GetCyclesDefault<string mx> {
32  int c = !cond(
33    !eq(mx, "M1") : 2,
34    !eq(mx, "M2") : 4,
35    !eq(mx, "M4") : 8,
36    !eq(mx, "M8") : 16,
37    !eq(mx, "MF2") : 1,
38    !eq(mx, "MF4") : 1,
39    !eq(mx, "MF8") : 1
40  );
41}
42
43class SiFive7GetCyclesNarrowing<string mx> {
44  int c = !cond(
45    !eq(mx, "M1") : 4,
46    !eq(mx, "M2") : 8,
47    !eq(mx, "M4") : 16,
48    !eq(mx, "MF2") : 2,
49    !eq(mx, "MF4") : 1,
50    !eq(mx, "MF8") : 1
51  );
52}
53
54class SiFive7GetCyclesVMask<string mx> {
55  int c = !cond(
56    !eq(mx, "M1") : 1,
57    !eq(mx, "M2") : 1,
58    !eq(mx, "M4") : 1,
59    !eq(mx, "M8") : 2,
60    !eq(mx, "MF2") : 1,
61    !eq(mx, "MF4") : 1,
62    !eq(mx, "MF8") : 1
63  );
64}
65
66/// VLDM and VSTM can't read/write more than 2 DLENs of data.
67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs
68class SiFive7GetMaskLoadStoreCycles<string mx> {
69  int c = !cond(
70    !eq(mx, "M8")  : 2,
71    true : 1
72  );
73}
74
75// Cycles for nf=2 segmented loads and stores are calculated using the
76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL
77class SiFive7GetCyclesSegmentedSeg2<string mx> {
78  int c = !cond(
79    !eq(mx, "M1") :  4,
80    !eq(mx, "M2") :  8,
81    !eq(mx, "M4") :  16,
82    !eq(mx, "M8") :  32,
83    !eq(mx, "MF2") : 2,
84    !eq(mx, "MF4") : 1,
85    !eq(mx, "MF8") : 1
86  );
87}
88
89// Cycles for segmented loads and stores are calculated using the
90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
92  defvar VLEN = 512;
93  defvar DLEN = 256;
94  // (VLEN * LMUL) / SEW
95  defvar VLUpperBound  = !cond(
96    !eq(mx, "M1") : !div(VLEN, sew),
97    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
98    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
99    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
100    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
101    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
102    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
103  );
104  // We can calculate ceil(a/b) using (a + b - 1) / b.
105  defvar a = !mul(sew, nf);
106  defvar b = DLEN;
107  int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
108}
109
110class SiFive7GetCyclesOnePerElement<string mx, int sew> {
111  // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
112  // to use a different VLEN, this model will not make scheduling decisions
113  // based on the user specified VLEN.
114  // c = ceil(VLEN / SEW) * LMUL
115  // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
116  // largest division performed on VLEN is in MF8 case with division
117  // by 8. Therefore, there is no need to ceil the result.
118  int VLEN = !div(512, sew);
119  int c = !cond(
120    !eq(mx, "M1")  : VLEN,
121    !eq(mx, "M2")  : !mul(VLEN, 2),
122    !eq(mx, "M4")  : !mul(VLEN, 4),
123    !eq(mx, "M8")  : !mul(VLEN, 8),
124    !eq(mx, "MF2") : !div(VLEN, 2),
125    !eq(mx, "MF4") : !div(VLEN, 4),
126    !eq(mx, "MF8") : !div(VLEN, 8)
127  );
128}
129
130class SiFive7GetDivOrSqrtFactor<int sew> {
131  int c = !cond(
132    // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case.
133    !eq(sew, 8) : 15,
134    !eq(sew, 16) : 15,
135    !eq(sew, 32) : 28,
136    !eq(sew, 64) : 57
137  );
138}
139
140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
141/// cycles.
142class SiFive7GetReductionCycles<string mx, int sew> {
143  // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
144  // VLUpperBound=(VLEN*LMUL)/SEW.
145  defvar VLEN = 512;
146  defvar DLEN = !div(VLEN, 2);
147  defvar TwoTimesLMUL = !cond(
148    !eq(mx, "M1") : 2,
149    !eq(mx, "M2") : 4,
150    !eq(mx, "M4") : 8,
151    !eq(mx, "M8") : 16,
152    !eq(mx, "MF2") : 1,
153    !eq(mx, "MF4") : 1,
154    !eq(mx, "MF8") : 1
155  );
156  int c = !add(
157    TwoTimesLMUL,
158    !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
159  );
160}
161
162/// Cycles for ordered reductions take approximatley 6*VL cycles
163class SiFive7GetOrderedReductionCycles<string mx, int sew> {
164  defvar VLEN = 512;
165  // (VLEN * LMUL) / SEW
166  defvar VLUpperBound  = !cond(
167    !eq(mx, "M1") : !div(VLEN, sew),
168    !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
169    !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
170    !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
171    !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
172    !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
173    !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
174  );
175  int c = !mul(6, VLUpperBound);
176}
177
178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
179    : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32,
180                                 WriteShiftImm, WriteShiftImm32,
181                                 WriteShiftReg, WriteShiftReg32,
182                                 WriteSHXADD, WriteSHXADD32,
183                                 WriteRotateImm, WriteRotateImm32,
184                                 WriteRotateReg, WriteRotateReg32,
185                                 WriteSingleBit, WriteSingleBitImm,
186                                 WriteBEXT, WriteBEXTI,
187                                 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
188                                 WriteCPOP, WriteCPOP32,
189                                 WriteREV8, WriteORCB, WriteIMinMax, WriteSFB,
190                                 WriteIMul, WriteIMul32,
191                                 WriteIDiv, WriteIDiv32,
192                                 WriteIRem, WriteIRem32,
193                                 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;
194
195// SiFive7 machine model for scheduling and other instruction cost heuristics.
196def SiFive7Model : SchedMachineModel {
197  let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
198  let IssueWidth = 2;        // 2 micro-ops are dispatched per cycle.
199  let LoadLatency = 3;
200  let MispredictPenalty = 3;
201  let CompleteModel = 0;
202  let EnableIntervals = true;
203  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
204                             HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
205                             HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
206                             HasStdExtZkr];
207}
208
209// The SiFive7 microarchitecture has three pipelines: A, B, V.
210// Pipe A can handle memory, integer alu and vector operations.
211// Pipe B can handle integer alu, control flow, integer multiply and divide,
212// and floating point computation.
213// The V pipeline is modeled by the VCQ, VA, VL, and VS resources.
214let SchedModel = SiFive7Model in {
215let BufferSize = 0 in {
216def SiFive7PipeA       : ProcResource<1>;
217def SiFive7PipeB       : ProcResource<1>;
218def SiFive7IDiv        : ProcResource<1>; // Int Division
219def SiFive7FDiv        : ProcResource<1>; // FP Division/Sqrt
220def SiFive7VA          : ProcResource<1>; // Arithmetic sequencer
221def SiFive7VL          : ProcResource<1>; // Load sequencer
222def SiFive7VS          : ProcResource<1>; // Store sequencer
223// The VCQ accepts instructions from the the A Pipe and holds them until the
224// vector unit is ready to dequeue them. The unit dequeues up to one instruction
225// per cycle, in order, as soon as the sequencer for that type of instruction is
226// available. This resource is meant to be used for 1 cycle by all vector
227// instructions, to model that only one vector instruction may be dequed at a
228// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and
229// VS sequencer resources below. Each of them will only accept a single
230// instruction at a time and remain busy for the number of cycles associated
231// with that instruction.
232def SiFive7VCQ         : ProcResource<1>; // Vector Command Queue
233}
234
235def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
236
237// Branching
238let Latency = 3 in {
239def : WriteRes<WriteJmp, [SiFive7PipeB]>;
240def : WriteRes<WriteJal, [SiFive7PipeB]>;
241def : WriteRes<WriteJalr, [SiFive7PipeB]>;
242}
243
244//Short forward branch
245def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
246  let Latency = 3;
247  let NumMicroOps = 2;
248}
249
250// Integer arithmetic and logic
251let Latency = 3 in {
252def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
253def : WriteRes<WriteIALU32, [SiFive7PipeAB]>;
254def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>;
255def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>;
256def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>;
257def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>;
258}
259
260// Integer multiplication
261let Latency = 3 in {
262def : WriteRes<WriteIMul, [SiFive7PipeB]>;
263def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
264}
265
266// Integer division
267def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
268  let Latency = 66;
269  let ReleaseAtCycles = [1, 65];
270}
271def : WriteRes<WriteIDiv32,  [SiFive7PipeB, SiFive7IDiv]> {
272  let Latency = 34;
273  let ReleaseAtCycles = [1, 33];
274}
275
276// Integer remainder
277def : WriteRes<WriteIRem, [SiFive7PipeB, SiFive7IDiv]> {
278  let Latency = 66;
279  let ReleaseAtCycles = [1, 65];
280}
281def : WriteRes<WriteIRem32,  [SiFive7PipeB, SiFive7IDiv]> {
282  let Latency = 34;
283  let ReleaseAtCycles = [1, 33];
284}
285
286// Bitmanip
287let Latency = 3 in {
288// Rotates are in the late-B ALU.
289def : WriteRes<WriteRotateImm, [SiFive7PipeB]>;
290def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>;
291def : WriteRes<WriteRotateReg, [SiFive7PipeB]>;
292def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>;
293
294// clz[w]/ctz[w] are in the late-B ALU.
295def : WriteRes<WriteCLZ, [SiFive7PipeB]>;
296def : WriteRes<WriteCLZ32, [SiFive7PipeB]>;
297def : WriteRes<WriteCTZ, [SiFive7PipeB]>;
298def : WriteRes<WriteCTZ32, [SiFive7PipeB]>;
299
300// cpop[w] look exactly like multiply.
301def : WriteRes<WriteCPOP, [SiFive7PipeB]>;
302def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
303
304// orc.b is in the late-B ALU.
305def : WriteRes<WriteORCB, [SiFive7PipeB]>;
306
307// min/max are in the late-B ALU
308def : WriteRes<WriteIMinMax, [SiFive7PipeB]>;
309
310// rev8 is in the late-A and late-B ALUs.
311def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
312
313// shNadd[.uw] is on the early-B and late-B ALUs.
314def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
315def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
316}
317
318// Single-bit instructions
319// BEXT[I] instruction is available on all ALUs and the other instructions
320// are only available on the SiFive7B pipe.
321let Latency = 3 in {
322def : WriteRes<WriteSingleBit, [SiFive7PipeB]>;
323def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>;
324def : WriteRes<WriteBEXT, [SiFive7PipeAB]>;
325def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>;
326}
327
328// Memory
329def : WriteRes<WriteSTB, [SiFive7PipeA]>;
330def : WriteRes<WriteSTH, [SiFive7PipeA]>;
331def : WriteRes<WriteSTW, [SiFive7PipeA]>;
332def : WriteRes<WriteSTD, [SiFive7PipeA]>;
333def : WriteRes<WriteFST16, [SiFive7PipeA]>;
334def : WriteRes<WriteFST32, [SiFive7PipeA]>;
335def : WriteRes<WriteFST64, [SiFive7PipeA]>;
336
337let Latency = 3 in {
338def : WriteRes<WriteLDB, [SiFive7PipeA]>;
339def : WriteRes<WriteLDH, [SiFive7PipeA]>;
340def : WriteRes<WriteLDW, [SiFive7PipeA]>;
341def : WriteRes<WriteLDD, [SiFive7PipeA]>;
342}
343
344let Latency = 2 in {
345def : WriteRes<WriteFLD16, [SiFive7PipeA]>;
346def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
347def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
348}
349
350// Atomic memory
351def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>;
352def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>;
353
354let Latency = 3 in {
355def : WriteRes<WriteAtomicW, [SiFive7PipeA]>;
356def : WriteRes<WriteAtomicD, [SiFive7PipeA]>;
357def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
358def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
359}
360
361// Half precision.
362let Latency = 5 in {
363def : WriteRes<WriteFAdd16, [SiFive7PipeB]>;
364def : WriteRes<WriteFMul16, [SiFive7PipeB]>;
365def : WriteRes<WriteFMA16, [SiFive7PipeB]>;
366}
367let Latency = 3 in {
368def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
369def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
370}
371
372let Latency = 14, ReleaseAtCycles = [1, 13] in {
373def :  WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
374def :  WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
375}
376
377// Single precision.
378let Latency = 5 in {
379def : WriteRes<WriteFAdd32, [SiFive7PipeB]>;
380def : WriteRes<WriteFMul32, [SiFive7PipeB]>;
381def : WriteRes<WriteFMA32, [SiFive7PipeB]>;
382}
383let Latency = 3 in {
384def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>;
385def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>;
386}
387
388def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
389                                                         let ReleaseAtCycles = [1, 26]; }
390def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27;
391                                                          let ReleaseAtCycles = [1, 26]; }
392
393// Double precision
394let Latency = 7 in {
395def : WriteRes<WriteFAdd64, [SiFive7PipeB]>;
396def : WriteRes<WriteFMul64, [SiFive7PipeB]>;
397def : WriteRes<WriteFMA64, [SiFive7PipeB]>;
398}
399let Latency = 3 in {
400def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>;
401def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>;
402}
403
404def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
405                                                         let ReleaseAtCycles = [1, 55]; }
406def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
407                                                          let ReleaseAtCycles = [1, 55]; }
408
409// Conversions
410let Latency = 3 in {
411def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>;
412def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
413def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
414def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>;
415def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
416def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
417def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>;
418def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>;
419def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>;
420def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>;
421def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
422def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
423def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>;
424def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
425def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
426def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
427def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>;
428def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
429
430def : WriteRes<WriteFClass16, [SiFive7PipeB]>;
431def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
432def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
433def : WriteRes<WriteFCmp16, [SiFive7PipeB]>;
434def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
435def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
436def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>;
437def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>;
438def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
439def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
440def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
441def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
442}
443
444// 6. Configuration-Setting Instructions
445let Latency = 3 in {
446def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>;
447def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>;
448def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
449}
450
451// 7. Vector Loads and Stores
452// Unit-stride loads and stores can operate at the full bandwidth of the memory
453// pipe. The memory pipe is DLEN bits wide on x280.
454foreach mx = SchedMxList in {
455  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
456  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
457  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
458    defm "" : LMULWriteResMX<"WriteVLDE",    [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
459    defm "" : LMULWriteResMX<"WriteVLDFF",   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
460  }
461  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
462  defm "" : LMULWriteResMX<"WriteVSTE",    [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
463}
464
465foreach mx = SchedMxList in {
466  defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
467  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
468  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
469  defm "" : LMULWriteResMX<"WriteVLDM",    [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
470  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
471  defm "" : LMULWriteResMX<"WriteVSTM",    [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
472}
473
474// Strided loads and stores operate at one element per cycle and should be
475// scheduled accordingly. Indexed loads and stores operate at one element per
476// cycle, and they stall the machine until all addresses have been generated,
477// so they cannot be scheduled. Indexed and strided loads and stores have LMUL
478// specific suffixes, but since SEW is already encoded in the name of the
479// resource, we do not need to use LMULSEWXXX constructors. However, we do
480// use the SEW from the name to determine the number of Cycles.
481
482foreach mx = SchedMxList in {
483  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
484  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
485  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
486  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
487                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
488                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
489  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
490    defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
491    defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
492  }
493  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
494    defm "" : LMULWriteResMX<"WriteVSTS8",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
495    defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
496    defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
497  }
498}
499// TODO: The MxLists need to be filtered by EEW. We only need to support
500// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8
501// since LMUL >= 16/64.
502foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in {
503  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
504  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
505  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
506  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
507                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
508                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
509  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
510    defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
511    defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
512  }
513  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
514    defm "" : LMULWriteResMX<"WriteVSTS16",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
515    defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
516    defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
517  }
518}
519foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in {
520  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
521  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
522  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
523  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
524                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
525                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
526  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
527    defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
528    defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
529  }
530  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
531    defm "" : LMULWriteResMX<"WriteVSTS32",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
532    defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
533    defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
534  }
535}
536foreach mx = ["M1", "M2", "M4", "M8"] in {
537  defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c;
538  defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
539  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
540  defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64",  VLDSX0Pred, [SiFive7VCQ, SiFive7VL],
541                                       4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles),
542                                       [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>;
543  let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
544    defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
545    defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
546  }
547  let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
548    defm "" : LMULWriteResMX<"WriteVSTS64",  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
549    defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
550    defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
551  }
552}
553
554// VLD*R is LMUL aware
555let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
556  def : WriteRes<WriteVLD1R,  [SiFive7VCQ, SiFive7VL]>;
557let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
558  def : WriteRes<WriteVLD2R,  [SiFive7VCQ, SiFive7VL]>;
559let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
560  def : WriteRes<WriteVLD4R,  [SiFive7VCQ, SiFive7VL]>;
561let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
562  def : WriteRes<WriteVLD8R,  [SiFive7VCQ, SiFive7VL]>;
563// VST*R is LMUL aware
564let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
565  def : WriteRes<WriteVST1R,   [SiFive7VCQ, SiFive7VS]>;
566let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
567  def : WriteRes<WriteVST2R,   [SiFive7VCQ, SiFive7VS]>;
568let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
569  def : WriteRes<WriteVST4R,   [SiFive7VCQ, SiFive7VS]>;
570let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
571  def : WriteRes<WriteVST8R,   [SiFive7VCQ, SiFive7VS]>;
572
573// Segmented Loads and Stores
574// Unit-stride segmented loads and stores are effectively converted into strided
575// segment loads and stores. Strided segment loads and stores operate at up to
576// one segment per cycle if the segment fits within one aligned memory beat.
577// Indexed segment loads and stores operate at the same rate as strided ones,
578// but they stall the machine until all addresses have been generated.
579foreach mx = SchedMxList in {
580  foreach eew = [8, 16, 32, 64] in {
581    defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
582    defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
583    // Does not chain so set latency high
584    let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
585      defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew,   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
586      defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
587    }
588    let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
589    defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew,   [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
590    foreach nf=3-8 in {
591      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
592      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
593      // Does not chain so set latency high
594      let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
595        defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew,   [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
596        defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
597      }
598      let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
599      defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew,   [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
600    }
601  }
602}
603foreach mx = SchedMxList in {
604  foreach nf=2-8 in {
605    foreach eew = [8, 16, 32, 64] in {
606      defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
607      defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
608      // Does not chain so set latency high
609      let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
610        defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew,  [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
611        defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
612        defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>;
613      }
614      let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
615        defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew,  [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
616        defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
617        defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>;
618      }
619    }
620  }
621}
622
623// 11. Vector Integer Arithmetic Instructions
624foreach mx = SchedMxList in {
625  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
626  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
627  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
628    defm "" : LMULWriteResMX<"WriteVIALUV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
629    defm "" : LMULWriteResMX<"WriteVIALUX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
630    defm "" : LMULWriteResMX<"WriteVIALUI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
631    defm "" : LMULWriteResMX<"WriteVICALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
632    defm "" : LMULWriteResMX<"WriteVICALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
633    defm "" : LMULWriteResMX<"WriteVICALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
634    defm "" : LMULWriteResMX<"WriteVICALUMV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
635    defm "" : LMULWriteResMX<"WriteVICALUMX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
636    defm "" : LMULWriteResMX<"WriteVICALUMI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
637    defm "" : LMULWriteResMX<"WriteVIMinMaxV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
638    defm "" : LMULWriteResMX<"WriteVIMinMaxX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
639    defm "" : LMULWriteResMX<"WriteVIMergeV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
640    defm "" : LMULWriteResMX<"WriteVIMergeX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
641    defm "" : LMULWriteResMX<"WriteVIMergeI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
642    defm "" : LMULWriteResMX<"WriteVIMovV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
643    defm "" : LMULWriteResMX<"WriteVIMovX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
644    defm "" : LMULWriteResMX<"WriteVIMovI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
645  }
646  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
647    defm "" : LMULWriteResMX<"WriteVShiftV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
648    defm "" : LMULWriteResMX<"WriteVShiftX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
649    defm "" : LMULWriteResMX<"WriteVShiftI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
650    defm "" : LMULWriteResMX<"WriteVIMulV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
651    defm "" : LMULWriteResMX<"WriteVIMulX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
652    defm "" : LMULWriteResMX<"WriteVIMulAddV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
653    defm "" : LMULWriteResMX<"WriteVIMulAddX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
654  }
655  // Mask results can't chain.
656  let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
657    defm "" : LMULWriteResMX<"WriteVICmpV",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
658    defm "" : LMULWriteResMX<"WriteVICmpX",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
659    defm "" : LMULWriteResMX<"WriteVICmpI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
660  }
661}
662foreach mx = SchedMxList in {
663  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
664  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
665  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
666    defm "" : LMULWriteResMX<"WriteVExtV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
667  }
668}
669foreach mx = SchedMxList in {
670  foreach sew = SchedSEWSet<mx>.val in {
671    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
672                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
673    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
674    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
675      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
676      defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
677    }
678  }
679}
680
681// Widening
682foreach mx = SchedMxListW in {
683  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
684  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
685  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
686    defm "" : LMULWriteResMX<"WriteVIWALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
687    defm "" : LMULWriteResMX<"WriteVIWALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
688    defm "" : LMULWriteResMX<"WriteVIWALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
689    defm "" : LMULWriteResMX<"WriteVIWMulV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
690    defm "" : LMULWriteResMX<"WriteVIWMulX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
691    defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
692    defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
693  }
694}
695// Narrowing
696foreach mx = SchedMxListW in {
697  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
698  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
699  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
700    defm "" : LMULWriteResMX<"WriteVNShiftV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
701    defm "" : LMULWriteResMX<"WriteVNShiftX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
702    defm "" : LMULWriteResMX<"WriteVNShiftI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
703  }
704}
705
706// 12. Vector Fixed-Point Arithmetic Instructions
707foreach mx = SchedMxList in {
708  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
709  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
710  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
711    defm "" : LMULWriteResMX<"WriteVSALUV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
712    defm "" : LMULWriteResMX<"WriteVSALUX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
713    defm "" : LMULWriteResMX<"WriteVSALUI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
714    defm "" : LMULWriteResMX<"WriteVAALUV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
715    defm "" : LMULWriteResMX<"WriteVAALUX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
716    defm "" : LMULWriteResMX<"WriteVSMulV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
717    defm "" : LMULWriteResMX<"WriteVSMulX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
718    defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
719    defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
720    defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
721  }
722}
723// Narrowing
724foreach mx = SchedMxListW in {
725  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
726  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
727  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
728    defm "" : LMULWriteResMX<"WriteVNClipV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
729    defm "" : LMULWriteResMX<"WriteVNClipX",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
730    defm "" : LMULWriteResMX<"WriteVNClipI",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
731  }
732}
733
734// 13. Vector Floating-Point Instructions
735foreach mx = SchedMxListF in {
736  foreach sew = SchedSEWSet<mx, isF=1>.val in {
737    defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
738    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
739    let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
740      defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
741      defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
742      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
743      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
744      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
745      defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
746      defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV",   [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
747      defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
748    }
749    let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
750      defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
751      defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
752      defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV",   [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
753      defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF",   [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
754    }
755  }
756}
757foreach mx = SchedMxList in {
758  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
759  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
760  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
761    defm "" : LMULWriteResMX<"WriteVFCvtFToIV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
762  }
763  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
764    defm "" : LMULWriteResMX<"WriteVFClassV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
765    defm "" : LMULWriteResMX<"WriteVFMergeV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
766    defm "" : LMULWriteResMX<"WriteVFMovV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
767  }
768  // Mask results can't chain.
769  let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
770    defm "" : LMULWriteResMX<"WriteVFCmpV",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
771    defm "" : LMULWriteResMX<"WriteVFCmpF",      [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
772  }
773}
774foreach mx = SchedMxListF in {
775  foreach sew = SchedSEWSet<mx, isF=1>.val in {
776    defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
777                         !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
778    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
779    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
780      defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
781      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
782      defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF",  [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
783    }
784  }
785}
786
787// Widening
788foreach mx = SchedMxListW in {
789  foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
790    defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
791    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
792    let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
793    defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
794  }
795}
796foreach mx = SchedMxListFW in {
797  foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
798    defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
799    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
800    let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
801      defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
802      defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
803      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
804      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
805      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
806      defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
807      defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
808    }
809  }
810  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
811  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
812  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
813  defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
814}
815// Narrowing
816foreach mx = SchedMxListW in {
817  defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
818  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
819  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
820    defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
821  }
822}
823foreach mx = SchedMxListFW in {
824  foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
825    defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
826    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
827    let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
828      defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
829      defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
830    }
831  }
832}
833
834// 14. Vector Reduction Operations
835foreach mx = SchedMxList in {
836  foreach sew = SchedSEWSet<mx>.val in {
837    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
838    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
839    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
840      defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA],
841                                     mx, sew, IsWorstCase>;
842      defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
843                                     mx, sew, IsWorstCase>;
844    }
845  }
846}
847
848foreach mx = SchedMxListWRed in {
849  foreach sew = SchedSEWSet<mx, 0, 1>.val in {
850    defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
851    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
852    let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in
853    defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA],
854                                   mx, sew, IsWorstCase>;
855  }
856}
857
858foreach mx = SchedMxListF in {
859  foreach sew = SchedSEWSet<mx, 1>.val in {
860    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
861    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
862    let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in {
863      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA],
864                                     mx, sew, IsWorstCase>;
865      defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA],
866                                     mx, sew, IsWorstCase>;
867    }
868    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
869    let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
870    defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA],
871                                   mx, sew, IsWorstCase>;
872  }
873}
874
875foreach mx = SchedMxListFWRed in {
876  foreach sew = SchedSEWSet<mx, 1, 1>.val in {
877    defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
878    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
879    let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in
880    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA],
881                                   mx, sew, IsWorstCase>;
882    defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
883    let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in
884    defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA],
885                                   mx, sew, IsWorstCase>;
886  }
887}
888
889// 15. Vector Mask Instructions
890foreach mx = SchedMxList in {
891  defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
892  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
893  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
894    defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
895    defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
896    defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
897    defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
898  }
899}
900foreach mx = SchedMxList in {
901  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
902  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
903  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
904    defm "" : LMULWriteResMX<"WriteVIotaV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
905    defm "" : LMULWriteResMX<"WriteVIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
906  }
907}
908
909// 16. Vector Permutation Instructions
910let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in {
911  def : WriteRes<WriteVMovSX, [SiFive7VCQ, SiFive7VA]>;
912  def : WriteRes<WriteVMovXS, [SiFive7VCQ, SiFive7VA]>;
913  def : WriteRes<WriteVMovSF, [SiFive7VCQ, SiFive7VA]>;
914  def : WriteRes<WriteVMovFS, [SiFive7VCQ, SiFive7VA]>;
915}
916foreach mx = SchedMxList in {
917  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
918  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
919  let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
920    defm "" : LMULWriteResMX<"WriteVRGatherVX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
921    defm "" : LMULWriteResMX<"WriteVRGatherVI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
922  }
923}
924
925foreach mx = SchedMxList in {
926  foreach sew = SchedSEWSet<mx>.val in {
927    defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
928    defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
929    let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
930      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
931      defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
932      defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
933    }
934  }
935}
936
937foreach mx = SchedMxList in {
938  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
939  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
940  let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
941    defm "" : LMULWriteResMX<"WriteVSlideUpX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
942    defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
943    defm "" : LMULWriteResMX<"WriteVSlideI",     [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
944    defm "" : LMULWriteResMX<"WriteVISlide1X",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
945    defm "" : LMULWriteResMX<"WriteVFSlide1F",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
946  }
947}
948
949// VMov*V is LMUL Aware
950let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in
951  def : WriteRes<WriteVMov1V,     [SiFive7VCQ, SiFive7VA]>;
952let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in
953  def : WriteRes<WriteVMov2V,     [SiFive7VCQ, SiFive7VA]>;
954let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in
955  def : WriteRes<WriteVMov4V,     [SiFive7VCQ, SiFive7VA]>;
956let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in
957  def : WriteRes<WriteVMov8V,     [SiFive7VCQ, SiFive7VA]>;
958
959// Others
960def : WriteRes<WriteCSR, [SiFive7PipeB]>;
961def : WriteRes<WriteNop, []>;
962let Latency = 3 in
963  def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>;
964
965def : InstRW<[WriteIALU], (instrs COPY)>;
966
967// VCIX
968//
969// In principle we don't know the latency of any VCIX instructions (they
970// depends on a particular coprocessor implementation). However, the default
971// latency of 1 can lead to issues [1]. So instead we set the latency to the
972// default provided by `SiFive7GetCyclesDefault`. This is still not accurate
973// and can lead to suboptimal codegen, but should hopefully be a better
974// starting point.
975//
976// [1] https://github.com/llvm/llvm-project/issues/83391
977foreach mx = SchedMxList in {
978  defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
979  defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
980  let Latency = Cycles,
981      AcquireAtCycles = [0, 1],
982      ReleaseAtCycles = [1, !add(1, Cycles)] in {
983    defm "" : LMULWriteResMX<"WriteVC_V_I",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
984    defm "" : LMULWriteResMX<"WriteVC_V_X",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
985    defm "" : LMULWriteResMX<"WriteVC_V_IV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
986    defm "" : LMULWriteResMX<"WriteVC_V_VV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
987    defm "" : LMULWriteResMX<"WriteVC_V_XV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
988    defm "" : LMULWriteResMX<"WriteVC_V_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
989    defm "" : LMULWriteResMX<"WriteVC_V_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
990    defm "" : LMULWriteResMX<"WriteVC_V_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
991    defm "" : LMULWriteResMX<"WriteVC_V_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
992    defm "" : LMULWriteResMX<"WriteVC_V_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
993    defm "" : LMULWriteResMX<"WriteVC_V_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
994    foreach f = ["FPR16", "FPR32", "FPR64"] in {
995      defm "" : LMULWriteResMX<"WriteVC_V_" # f # "V",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
996      defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
997      defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
998    }
999    defm "" : LMULWriteResMX<"WriteVC_I",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1000    defm "" : LMULWriteResMX<"WriteVC_X",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1001    defm "" : LMULWriteResMX<"WriteVC_IV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1002    defm "" : LMULWriteResMX<"WriteVC_VV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1003    defm "" : LMULWriteResMX<"WriteVC_XV",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1004    defm "" : LMULWriteResMX<"WriteVC_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1005    defm "" : LMULWriteResMX<"WriteVC_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1006    defm "" : LMULWriteResMX<"WriteVC_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1007    defm "" : LMULWriteResMX<"WriteVC_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1008    defm "" : LMULWriteResMX<"WriteVC_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1009    defm "" : LMULWriteResMX<"WriteVC_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1010    foreach f = ["FPR16", "FPR32", "FPR64"] in {
1011      defm "" : LMULWriteResMX<"WriteVC_" # f # "V",  [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1012      defm "" : LMULWriteResMX<"WriteVC_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1013      defm "" : LMULWriteResMX<"WriteVC_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
1014    }
1015  }
1016}
1017
1018//===----------------------------------------------------------------------===//
1019
1020// Bypass and advance
1021def : SiFive7AnyToGPRBypass<ReadJmp>;
1022def : SiFive7AnyToGPRBypass<ReadJalr>;
1023def : ReadAdvance<ReadCSR, 0>;
1024def : SiFive7AnyToGPRBypass<ReadStoreData>;
1025def : ReadAdvance<ReadMemBase, 0>;
1026def : SiFive7AnyToGPRBypass<ReadIALU>;
1027def : SiFive7AnyToGPRBypass<ReadIALU32>;
1028def : SiFive7AnyToGPRBypass<ReadShiftImm>;
1029def : SiFive7AnyToGPRBypass<ReadShiftImm32>;
1030def : SiFive7AnyToGPRBypass<ReadShiftReg>;
1031def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
1032def : ReadAdvance<ReadIDiv, 0>;
1033def : ReadAdvance<ReadIDiv32, 0>;
1034def : ReadAdvance<ReadIRem, 0>;
1035def : ReadAdvance<ReadIRem32, 0>;
1036def : ReadAdvance<ReadIMul, 0>;
1037def : ReadAdvance<ReadIMul32, 0>;
1038def : ReadAdvance<ReadAtomicWA, 0>;
1039def : ReadAdvance<ReadAtomicWD, 0>;
1040def : ReadAdvance<ReadAtomicDA, 0>;
1041def : ReadAdvance<ReadAtomicDD, 0>;
1042def : ReadAdvance<ReadAtomicLDW, 0>;
1043def : ReadAdvance<ReadAtomicLDD, 0>;
1044def : ReadAdvance<ReadAtomicSTW, 0>;
1045def : ReadAdvance<ReadAtomicSTD, 0>;
1046def : ReadAdvance<ReadFStoreData, 0>;
1047def : ReadAdvance<ReadFMemBase, 0>;
1048def : ReadAdvance<ReadFAdd16, 0>;
1049def : ReadAdvance<ReadFAdd32, 0>;
1050def : ReadAdvance<ReadFAdd64, 0>;
1051def : ReadAdvance<ReadFMul16, 0>;
1052def : ReadAdvance<ReadFMA16, 0>;
1053def : ReadAdvance<ReadFMA16Addend, 0>;
1054def : ReadAdvance<ReadFMul32, 0>;
1055def : ReadAdvance<ReadFMul64, 0>;
1056def : ReadAdvance<ReadFMA32, 0>;
1057def : ReadAdvance<ReadFMA32Addend, 0>;
1058def : ReadAdvance<ReadFMA64, 0>;
1059def : ReadAdvance<ReadFMA64Addend, 0>;
1060def : ReadAdvance<ReadFDiv16, 0>;
1061def : ReadAdvance<ReadFDiv32, 0>;
1062def : ReadAdvance<ReadFDiv64, 0>;
1063def : ReadAdvance<ReadFSqrt16, 0>;
1064def : ReadAdvance<ReadFSqrt32, 0>;
1065def : ReadAdvance<ReadFSqrt64, 0>;
1066def : ReadAdvance<ReadFCmp16, 0>;
1067def : ReadAdvance<ReadFCmp32, 0>;
1068def : ReadAdvance<ReadFCmp64, 0>;
1069def : ReadAdvance<ReadFSGNJ16, 0>;
1070def : ReadAdvance<ReadFSGNJ32, 0>;
1071def : ReadAdvance<ReadFSGNJ64, 0>;
1072def : ReadAdvance<ReadFMinMax16, 0>;
1073def : ReadAdvance<ReadFMinMax32, 0>;
1074def : ReadAdvance<ReadFMinMax64, 0>;
1075def : ReadAdvance<ReadFCvtF16ToI32, 0>;
1076def : ReadAdvance<ReadFCvtF16ToI64, 0>;
1077def : ReadAdvance<ReadFCvtF32ToI32, 0>;
1078def : ReadAdvance<ReadFCvtF32ToI64, 0>;
1079def : ReadAdvance<ReadFCvtF64ToI32, 0>;
1080def : ReadAdvance<ReadFCvtF64ToI64, 0>;
1081def : ReadAdvance<ReadFCvtI32ToF16, 0>;
1082def : ReadAdvance<ReadFCvtI32ToF32, 0>;
1083def : ReadAdvance<ReadFCvtI32ToF64, 0>;
1084def : ReadAdvance<ReadFCvtI64ToF16, 0>;
1085def : ReadAdvance<ReadFCvtI64ToF32, 0>;
1086def : ReadAdvance<ReadFCvtI64ToF64, 0>;
1087def : ReadAdvance<ReadFCvtF32ToF64, 0>;
1088def : ReadAdvance<ReadFCvtF64ToF32, 0>;
1089def : ReadAdvance<ReadFCvtF16ToF32, 0>;
1090def : ReadAdvance<ReadFCvtF32ToF16, 0>;
1091def : ReadAdvance<ReadFCvtF16ToF64, 0>;
1092def : ReadAdvance<ReadFCvtF64ToF16, 0>;
1093def : ReadAdvance<ReadFMovF16ToI16, 0>;
1094def : ReadAdvance<ReadFMovI16ToF16, 0>;
1095def : ReadAdvance<ReadFMovF32ToI32, 0>;
1096def : ReadAdvance<ReadFMovI32ToF32, 0>;
1097def : ReadAdvance<ReadFMovF64ToI64, 0>;
1098def : ReadAdvance<ReadFMovI64ToF64, 0>;
1099def : ReadAdvance<ReadFClass16, 0>;
1100def : ReadAdvance<ReadFClass32, 0>;
1101def : ReadAdvance<ReadFClass64, 0>;
1102
1103def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>;
1104def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>;
1105
1106// Bitmanip
1107def : SiFive7AnyToGPRBypass<ReadRotateImm>;
1108def : SiFive7AnyToGPRBypass<ReadRotateImm32>;
1109def : SiFive7AnyToGPRBypass<ReadRotateReg>;
1110def : SiFive7AnyToGPRBypass<ReadRotateReg32>;
1111def : SiFive7AnyToGPRBypass<ReadCLZ>;
1112def : SiFive7AnyToGPRBypass<ReadCLZ32>;
1113def : SiFive7AnyToGPRBypass<ReadCTZ>;
1114def : SiFive7AnyToGPRBypass<ReadCTZ32>;
1115def : ReadAdvance<ReadCPOP, 0>;
1116def : ReadAdvance<ReadCPOP32, 0>;
1117def : SiFive7AnyToGPRBypass<ReadORCB>;
1118def : SiFive7AnyToGPRBypass<ReadIMinMax>;
1119def : SiFive7AnyToGPRBypass<ReadREV8>;
1120def : SiFive7AnyToGPRBypass<ReadSHXADD>;
1121def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
1122// Single-bit instructions
1123def : SiFive7AnyToGPRBypass<ReadSingleBit>;
1124def : SiFive7AnyToGPRBypass<ReadSingleBitImm>;
1125
1126// 6. Configuration-Setting Instructions
1127def : ReadAdvance<ReadVSETVLI, 2>;
1128def : ReadAdvance<ReadVSETVL, 2>;
1129
1130// 7. Vector Loads and Stores
1131def : ReadAdvance<ReadVLDX, 0>;
1132def : ReadAdvance<ReadVSTX, 0>;
1133defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
1134defm "" : LMULReadAdvance<"ReadVSTM", 0>;
1135def : ReadAdvance<ReadVLDSX, 0>;
1136def : ReadAdvance<ReadVSTSX, 0>;
1137defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
1138defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
1139defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
1140defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
1141defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
1142defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
1143defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
1144defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
1145defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
1146defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
1147defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
1148defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
1149defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
1150defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
1151defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
1152defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
1153defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
1154defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
1155defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
1156defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
1157defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
1158defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
1159defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
1160defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
1161// LMUL Aware
1162def : ReadAdvance<ReadVST1R, 0>;
1163def : ReadAdvance<ReadVST2R, 0>;
1164def : ReadAdvance<ReadVST4R, 0>;
1165def : ReadAdvance<ReadVST8R, 0>;
1166
1167// 12. Vector Integer Arithmetic Instructions
1168defm : LMULReadAdvance<"ReadVIALUV", 0>;
1169defm : LMULReadAdvance<"ReadVIALUX", 0>;
1170defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
1171defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
1172defm : LMULReadAdvance<"ReadVExtV", 0>;
1173defm : LMULReadAdvance<"ReadVICALUV", 0>;
1174defm : LMULReadAdvance<"ReadVICALUX", 0>;
1175defm : LMULReadAdvance<"ReadVShiftV", 0>;
1176defm : LMULReadAdvance<"ReadVShiftX", 0>;
1177defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
1178defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
1179defm : LMULReadAdvance<"ReadVICmpV", 0>;
1180defm : LMULReadAdvance<"ReadVICmpX", 0>;
1181defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
1182defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
1183defm : LMULReadAdvance<"ReadVIMulV", 0>;
1184defm : LMULReadAdvance<"ReadVIMulX", 0>;
1185defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
1186defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
1187defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
1188defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
1189defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
1190defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
1191defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
1192defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
1193defm : LMULReadAdvance<"ReadVIMergeV", 0>;
1194defm : LMULReadAdvance<"ReadVIMergeX", 0>;
1195defm : LMULReadAdvance<"ReadVIMovV", 0>;
1196defm : LMULReadAdvance<"ReadVIMovX", 0>;
1197
1198// 13. Vector Fixed-Point Arithmetic Instructions
1199defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
1200defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
1201defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
1202defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
1203defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
1204defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
1205defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
1206defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
1207defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
1208defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
1209
1210// 14. Vector Floating-Point Instructions
1211defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
1212defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
1213defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
1214defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
1215defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>;
1216defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>;
1217defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
1218defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
1219defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
1220defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
1221defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
1222defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
1223defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
1224defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
1225defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
1226defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
1227defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>;
1228defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>;
1229defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>;
1230defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>;
1231defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
1232defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
1233defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
1234defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
1235defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
1236defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
1237defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>;
1238defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
1239defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>;
1240defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
1241defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
1242defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
1243defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
1244defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
1245
1246// 15. Vector Reduction Operations
1247def : ReadAdvance<ReadVIRedV, 0>;
1248def : ReadAdvance<ReadVIRedV0, 0>;
1249def : ReadAdvance<ReadVIWRedV, 0>;
1250def : ReadAdvance<ReadVIWRedV0, 0>;
1251def : ReadAdvance<ReadVFRedV, 0>;
1252def : ReadAdvance<ReadVFRedV0, 0>;
1253def : ReadAdvance<ReadVFRedOV, 0>;
1254def : ReadAdvance<ReadVFRedOV0, 0>;
1255def : ReadAdvance<ReadVFWRedV, 0>;
1256def : ReadAdvance<ReadVFWRedV0, 0>;
1257def : ReadAdvance<ReadVFWRedOV, 0>;
1258def : ReadAdvance<ReadVFWRedOV0, 0>;
1259
1260// 16. Vector Mask Instructions
1261defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
1262defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
1263defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
1264defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
1265defm "" : LMULReadAdvance<"ReadVIotaV", 0>;
1266
1267// 17. Vector Permutation Instructions
1268def : ReadAdvance<ReadVMovXS, 0>;
1269def : ReadAdvance<ReadVMovSX_V, 0>;
1270def : ReadAdvance<ReadVMovSX_X, 0>;
1271def : ReadAdvance<ReadVMovFS, 0>;
1272def : ReadAdvance<ReadVMovSF_V, 0>;
1273def : ReadAdvance<ReadVMovSF_F, 0>;
1274defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
1275defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
1276defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
1277defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
1278defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
1279defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
1280defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>;
1281defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>;
1282defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
1283defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
1284defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
1285defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
1286// LMUL Aware
1287def : ReadAdvance<ReadVMov1V, 0>;
1288def : ReadAdvance<ReadVMov2V, 0>;
1289def : ReadAdvance<ReadVMov4V, 0>;
1290def : ReadAdvance<ReadVMov8V, 0>;
1291
1292// Others
1293def : ReadAdvance<ReadVMask, 0>;
1294def : ReadAdvance<ReadVPassthru_WorstCase, 0>;
1295foreach mx = SchedMxList in {
1296  def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx), 0>;
1297  foreach sew = SchedSEWSet<mx>.val in
1298    def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx  # "_E" # sew), 0>;
1299}
1300
1301//===----------------------------------------------------------------------===//
1302// Unsupported extensions
1303defm : UnsupportedSchedZabha;
1304defm : UnsupportedSchedZbc;
1305defm : UnsupportedSchedZbkb;
1306defm : UnsupportedSchedZbkx;
1307defm : UnsupportedSchedZfa;
1308defm : UnsupportedSchedZvk;
1309}
1310