1//==- RISCVSchedSiFive7.td - SiFive7 Scheduling Definitions --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10 11/// c is true if mx has the worst case behavior compared to LMULs in MxList. 12/// On the SiFive7, the worst case LMUL is the Largest LMUL 13/// and the worst case sew is the smallest SEW for that LMUL. 14class SiFive7IsWorstCaseMX<string mx, list<string> MxList> { 15 defvar LLMUL = LargestLMUL<MxList>.r; 16 bit c = !eq(mx, LLMUL); 17} 18 19/// c is true if mx and sew have the worst case behavior compared to LMULs in 20/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL 21/// and the worst case sew is the smallest SEW for that LMUL. 22class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, 23 bit isF = 0> { 24 defvar LLMUL = LargestLMUL<MxList>.r; 25 defvar SSEW = SmallestSEW<mx, isF>.r; 26 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); 27} 28 29/// Number of DLEN parts = (LMUL * VLEN) / DLEN. 30/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL. 31class SiFive7GetCyclesDefault<string mx> { 32 int c = !cond( 33 !eq(mx, "M1") : 2, 34 !eq(mx, "M2") : 4, 35 !eq(mx, "M4") : 8, 36 !eq(mx, "M8") : 16, 37 !eq(mx, "MF2") : 1, 38 !eq(mx, "MF4") : 1, 39 !eq(mx, "MF8") : 1 40 ); 41} 42 43class SiFive7GetCyclesNarrowing<string mx> { 44 int c = !cond( 45 !eq(mx, "M1") : 4, 46 !eq(mx, "M2") : 8, 47 !eq(mx, "M4") : 16, 48 !eq(mx, "MF2") : 2, 49 !eq(mx, "MF4") : 1, 50 !eq(mx, "MF8") : 1 51 ); 52} 53 54class SiFive7GetCyclesVMask<string mx> { 55 int c = !cond( 56 !eq(mx, "M1") : 1, 57 !eq(mx, "M2") : 1, 58 !eq(mx, "M4") : 1, 59 !eq(mx, "M8") : 2, 60 !eq(mx, "MF2") : 1, 61 !eq(mx, "MF4") : 1, 62 !eq(mx, "MF8") : 1 63 ); 64} 65 66/// VLDM and VSTM can't read/write more than 2 DLENs of data. 67/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs 68class SiFive7GetMaskLoadStoreCycles<string mx> { 69 int c = !cond( 70 !eq(mx, "M8") : 2, 71 true : 1 72 ); 73} 74 75// Cycles for nf=2 segmented loads and stores are calculated using the 76// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL 77class SiFive7GetCyclesSegmentedSeg2<string mx> { 78 int c = !cond( 79 !eq(mx, "M1") : 4, 80 !eq(mx, "M2") : 8, 81 !eq(mx, "M4") : 16, 82 !eq(mx, "M8") : 32, 83 !eq(mx, "MF2") : 2, 84 !eq(mx, "MF4") : 1, 85 !eq(mx, "MF8") : 1 86 ); 87} 88 89// Cycles for segmented loads and stores are calculated using the 90// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size. 91class SiFive7GetCyclesSegmented<string mx, int sew, int nf> { 92 defvar VLEN = 512; 93 defvar DLEN = 256; 94 // (VLEN * LMUL) / SEW 95 defvar VLUpperBound = !cond( 96 !eq(mx, "M1") : !div(VLEN, sew), 97 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 98 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 99 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 100 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 101 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 102 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 103 ); 104 // We can calculate ceil(a/b) using (a + b - 1) / b. 105 defvar a = !mul(sew, nf); 106 defvar b = DLEN; 107 int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b)); 108} 109 110class SiFive7GetCyclesOnePerElement<string mx, int sew> { 111 // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler 112 // to use a different VLEN, this model will not make scheduling decisions 113 // based on the user specified VLEN. 114 // c = ceil(VLEN / SEW) * LMUL 115 // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the 116 // largest division performed on VLEN is in MF8 case with division 117 // by 8. Therefore, there is no need to ceil the result. 118 int VLEN = !div(512, sew); 119 int c = !cond( 120 !eq(mx, "M1") : VLEN, 121 !eq(mx, "M2") : !mul(VLEN, 2), 122 !eq(mx, "M4") : !mul(VLEN, 4), 123 !eq(mx, "M8") : !mul(VLEN, 8), 124 !eq(mx, "MF2") : !div(VLEN, 2), 125 !eq(mx, "MF4") : !div(VLEN, 4), 126 !eq(mx, "MF8") : !div(VLEN, 8) 127 ); 128} 129 130class SiFive7GetDivOrSqrtFactor<int sew> { 131 int c = !cond( 132 // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case. 133 !eq(sew, 8) : 15, 134 !eq(sew, 16) : 15, 135 !eq(sew, 32) : 28, 136 !eq(sew, 64) : 57 137 ); 138} 139 140/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW)) 141/// cycles. 142class SiFive7GetReductionCycles<string mx, int sew> { 143 // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since 144 // VLUpperBound=(VLEN*LMUL)/SEW. 145 defvar VLEN = 512; 146 defvar DLEN = !div(VLEN, 2); 147 defvar TwoTimesLMUL = !cond( 148 !eq(mx, "M1") : 2, 149 !eq(mx, "M2") : 4, 150 !eq(mx, "M4") : 8, 151 !eq(mx, "M8") : 16, 152 !eq(mx, "MF2") : 1, 153 !eq(mx, "MF4") : 1, 154 !eq(mx, "MF8") : 1 155 ); 156 int c = !add( 157 TwoTimesLMUL, 158 !mul(5, !add(4, !logtwo(!div(DLEN, sew)))) 159 ); 160} 161 162/// Cycles for ordered reductions take approximatley 6*VL cycles 163class SiFive7GetOrderedReductionCycles<string mx, int sew> { 164 defvar VLEN = 512; 165 // (VLEN * LMUL) / SEW 166 defvar VLUpperBound = !cond( 167 !eq(mx, "M1") : !div(VLEN, sew), 168 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 169 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 170 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 171 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 172 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 173 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 174 ); 175 int c = !mul(6, VLUpperBound); 176} 177 178class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2> 179 : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32, 180 WriteShiftImm, WriteShiftImm32, 181 WriteShiftReg, WriteShiftReg32, 182 WriteSHXADD, WriteSHXADD32, 183 WriteRotateImm, WriteRotateImm32, 184 WriteRotateReg, WriteRotateReg32, 185 WriteSingleBit, WriteSingleBitImm, 186 WriteBEXT, WriteBEXTI, 187 WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32, 188 WriteCPOP, WriteCPOP32, 189 WriteREV8, WriteORCB, WriteIMinMax, WriteSFB, 190 WriteIMul, WriteIMul32, 191 WriteIDiv, WriteIDiv32, 192 WriteIRem, WriteIRem32, 193 WriteLDB, WriteLDH, WriteLDW, WriteLDD]>; 194 195// SiFive7 machine model for scheduling and other instruction cost heuristics. 196def SiFive7Model : SchedMachineModel { 197 let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order. 198 let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 199 let LoadLatency = 3; 200 let MispredictPenalty = 3; 201 let CompleteModel = 0; 202 let EnableIntervals = true; 203 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, 204 HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, 205 HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, 206 HasStdExtZkr]; 207} 208 209// The SiFive7 microarchitecture has three pipelines: A, B, V. 210// Pipe A can handle memory, integer alu and vector operations. 211// Pipe B can handle integer alu, control flow, integer multiply and divide, 212// and floating point computation. 213// The V pipeline is modeled by the VCQ, VA, VL, and VS resources. 214let SchedModel = SiFive7Model in { 215let BufferSize = 0 in { 216def SiFive7PipeA : ProcResource<1>; 217def SiFive7PipeB : ProcResource<1>; 218def SiFive7IDiv : ProcResource<1>; // Int Division 219def SiFive7FDiv : ProcResource<1>; // FP Division/Sqrt 220def SiFive7VA : ProcResource<1>; // Arithmetic sequencer 221def SiFive7VL : ProcResource<1>; // Load sequencer 222def SiFive7VS : ProcResource<1>; // Store sequencer 223// The VCQ accepts instructions from the the A Pipe and holds them until the 224// vector unit is ready to dequeue them. The unit dequeues up to one instruction 225// per cycle, in order, as soon as the sequencer for that type of instruction is 226// available. This resource is meant to be used for 1 cycle by all vector 227// instructions, to model that only one vector instruction may be dequed at a 228// time. The actual dequeueing into the sequencer is modeled by the VA, VL, and 229// VS sequencer resources below. Each of them will only accept a single 230// instruction at a time and remain busy for the number of cycles associated 231// with that instruction. 232def SiFive7VCQ : ProcResource<1>; // Vector Command Queue 233} 234 235def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>; 236 237// Branching 238let Latency = 3 in { 239def : WriteRes<WriteJmp, [SiFive7PipeB]>; 240def : WriteRes<WriteJal, [SiFive7PipeB]>; 241def : WriteRes<WriteJalr, [SiFive7PipeB]>; 242} 243 244//Short forward branch 245def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> { 246 let Latency = 3; 247 let NumMicroOps = 2; 248} 249 250// Integer arithmetic and logic 251let Latency = 3 in { 252def : WriteRes<WriteIALU, [SiFive7PipeAB]>; 253def : WriteRes<WriteIALU32, [SiFive7PipeAB]>; 254def : WriteRes<WriteShiftImm, [SiFive7PipeAB]>; 255def : WriteRes<WriteShiftImm32, [SiFive7PipeAB]>; 256def : WriteRes<WriteShiftReg, [SiFive7PipeAB]>; 257def : WriteRes<WriteShiftReg32, [SiFive7PipeAB]>; 258} 259 260// Integer multiplication 261let Latency = 3 in { 262def : WriteRes<WriteIMul, [SiFive7PipeB]>; 263def : WriteRes<WriteIMul32, [SiFive7PipeB]>; 264} 265 266// Integer division 267def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> { 268 let Latency = 66; 269 let ReleaseAtCycles = [1, 65]; 270} 271def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> { 272 let Latency = 34; 273 let ReleaseAtCycles = [1, 33]; 274} 275 276// Integer remainder 277def : WriteRes<WriteIRem, [SiFive7PipeB, SiFive7IDiv]> { 278 let Latency = 66; 279 let ReleaseAtCycles = [1, 65]; 280} 281def : WriteRes<WriteIRem32, [SiFive7PipeB, SiFive7IDiv]> { 282 let Latency = 34; 283 let ReleaseAtCycles = [1, 33]; 284} 285 286// Bitmanip 287let Latency = 3 in { 288// Rotates are in the late-B ALU. 289def : WriteRes<WriteRotateImm, [SiFive7PipeB]>; 290def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>; 291def : WriteRes<WriteRotateReg, [SiFive7PipeB]>; 292def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>; 293 294// clz[w]/ctz[w] are in the late-B ALU. 295def : WriteRes<WriteCLZ, [SiFive7PipeB]>; 296def : WriteRes<WriteCLZ32, [SiFive7PipeB]>; 297def : WriteRes<WriteCTZ, [SiFive7PipeB]>; 298def : WriteRes<WriteCTZ32, [SiFive7PipeB]>; 299 300// cpop[w] look exactly like multiply. 301def : WriteRes<WriteCPOP, [SiFive7PipeB]>; 302def : WriteRes<WriteCPOP32, [SiFive7PipeB]>; 303 304// orc.b is in the late-B ALU. 305def : WriteRes<WriteORCB, [SiFive7PipeB]>; 306 307// min/max are in the late-B ALU 308def : WriteRes<WriteIMinMax, [SiFive7PipeB]>; 309 310// rev8 is in the late-A and late-B ALUs. 311def : WriteRes<WriteREV8, [SiFive7PipeAB]>; 312 313// shNadd[.uw] is on the early-B and late-B ALUs. 314def : WriteRes<WriteSHXADD, [SiFive7PipeB]>; 315def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>; 316} 317 318// Single-bit instructions 319// BEXT[I] instruction is available on all ALUs and the other instructions 320// are only available on the SiFive7B pipe. 321let Latency = 3 in { 322def : WriteRes<WriteSingleBit, [SiFive7PipeB]>; 323def : WriteRes<WriteSingleBitImm, [SiFive7PipeB]>; 324def : WriteRes<WriteBEXT, [SiFive7PipeAB]>; 325def : WriteRes<WriteBEXTI, [SiFive7PipeAB]>; 326} 327 328// Memory 329def : WriteRes<WriteSTB, [SiFive7PipeA]>; 330def : WriteRes<WriteSTH, [SiFive7PipeA]>; 331def : WriteRes<WriteSTW, [SiFive7PipeA]>; 332def : WriteRes<WriteSTD, [SiFive7PipeA]>; 333def : WriteRes<WriteFST16, [SiFive7PipeA]>; 334def : WriteRes<WriteFST32, [SiFive7PipeA]>; 335def : WriteRes<WriteFST64, [SiFive7PipeA]>; 336 337let Latency = 3 in { 338def : WriteRes<WriteLDB, [SiFive7PipeA]>; 339def : WriteRes<WriteLDH, [SiFive7PipeA]>; 340def : WriteRes<WriteLDW, [SiFive7PipeA]>; 341def : WriteRes<WriteLDD, [SiFive7PipeA]>; 342} 343 344let Latency = 2 in { 345def : WriteRes<WriteFLD16, [SiFive7PipeA]>; 346def : WriteRes<WriteFLD32, [SiFive7PipeA]>; 347def : WriteRes<WriteFLD64, [SiFive7PipeA]>; 348} 349 350// Atomic memory 351def : WriteRes<WriteAtomicSTW, [SiFive7PipeA]>; 352def : WriteRes<WriteAtomicSTD, [SiFive7PipeA]>; 353 354let Latency = 3 in { 355def : WriteRes<WriteAtomicW, [SiFive7PipeA]>; 356def : WriteRes<WriteAtomicD, [SiFive7PipeA]>; 357def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>; 358def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>; 359} 360 361// Half precision. 362let Latency = 5 in { 363def : WriteRes<WriteFAdd16, [SiFive7PipeB]>; 364def : WriteRes<WriteFMul16, [SiFive7PipeB]>; 365def : WriteRes<WriteFMA16, [SiFive7PipeB]>; 366} 367let Latency = 3 in { 368def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>; 369def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>; 370} 371 372let Latency = 14, ReleaseAtCycles = [1, 13] in { 373def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>; 374def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>; 375} 376 377// Single precision. 378let Latency = 5 in { 379def : WriteRes<WriteFAdd32, [SiFive7PipeB]>; 380def : WriteRes<WriteFMul32, [SiFive7PipeB]>; 381def : WriteRes<WriteFMA32, [SiFive7PipeB]>; 382} 383let Latency = 3 in { 384def : WriteRes<WriteFSGNJ32, [SiFive7PipeB]>; 385def : WriteRes<WriteFMinMax32, [SiFive7PipeB]>; 386} 387 388def : WriteRes<WriteFDiv32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 389 let ReleaseAtCycles = [1, 26]; } 390def : WriteRes<WriteFSqrt32, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 27; 391 let ReleaseAtCycles = [1, 26]; } 392 393// Double precision 394let Latency = 7 in { 395def : WriteRes<WriteFAdd64, [SiFive7PipeB]>; 396def : WriteRes<WriteFMul64, [SiFive7PipeB]>; 397def : WriteRes<WriteFMA64, [SiFive7PipeB]>; 398} 399let Latency = 3 in { 400def : WriteRes<WriteFSGNJ64, [SiFive7PipeB]>; 401def : WriteRes<WriteFMinMax64, [SiFive7PipeB]>; 402} 403 404def : WriteRes<WriteFDiv64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 405 let ReleaseAtCycles = [1, 55]; } 406def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56; 407 let ReleaseAtCycles = [1, 55]; } 408 409// Conversions 410let Latency = 3 in { 411def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>; 412def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>; 413def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>; 414def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>; 415def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>; 416def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>; 417def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>; 418def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>; 419def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>; 420def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>; 421def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>; 422def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>; 423def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>; 424def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>; 425def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>; 426def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>; 427def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>; 428def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>; 429 430def : WriteRes<WriteFClass16, [SiFive7PipeB]>; 431def : WriteRes<WriteFClass32, [SiFive7PipeB]>; 432def : WriteRes<WriteFClass64, [SiFive7PipeB]>; 433def : WriteRes<WriteFCmp16, [SiFive7PipeB]>; 434def : WriteRes<WriteFCmp32, [SiFive7PipeB]>; 435def : WriteRes<WriteFCmp64, [SiFive7PipeB]>; 436def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>; 437def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>; 438def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>; 439def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>; 440def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>; 441def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>; 442} 443 444// 6. Configuration-Setting Instructions 445let Latency = 3 in { 446def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>; 447def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>; 448def : WriteRes<WriteVSETVL, [SiFive7PipeA]>; 449} 450 451// 7. Vector Loads and Stores 452// Unit-stride loads and stores can operate at the full bandwidth of the memory 453// pipe. The memory pipe is DLEN bits wide on x280. 454foreach mx = SchedMxList in { 455 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 456 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 457 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 458 defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 459 defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 460 } 461 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 462 defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 463} 464 465foreach mx = SchedMxList in { 466 defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c; 467 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 468 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 469 defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 470 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 471 defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 472} 473 474// Strided loads and stores operate at one element per cycle and should be 475// scheduled accordingly. Indexed loads and stores operate at one element per 476// cycle, and they stall the machine until all addresses have been generated, 477// so they cannot be scheduled. Indexed and strided loads and stores have LMUL 478// specific suffixes, but since SEW is already encoded in the name of the 479// resource, we do not need to use LMULSEWXXX constructors. However, we do 480// use the SEW from the name to determine the number of Cycles. 481 482foreach mx = SchedMxList in { 483 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 484 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c; 485 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 486 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS8", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 487 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 488 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 489 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 490 defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 491 defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 492 } 493 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 494 defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 495 defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 496 defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 497 } 498} 499// TODO: The MxLists need to be filtered by EEW. We only need to support 500// LMUL >= SEW_min/ELEN. Here, the smallest EEW prevents us from having MF8 501// since LMUL >= 16/64. 502foreach mx = ["MF4", "MF2", "M1", "M2", "M4", "M8"] in { 503 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 504 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c; 505 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 506 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS16", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 507 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 508 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 509 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 510 defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 511 defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 512 } 513 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 514 defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 515 defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 516 defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 517 } 518} 519foreach mx = ["MF2", "M1", "M2", "M4", "M8"] in { 520 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 521 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c; 522 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 523 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS32", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 524 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 525 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 526 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 527 defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 528 defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 529 } 530 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 531 defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 532 defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 533 defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 534 } 535} 536foreach mx = ["M1", "M2", "M4", "M8"] in { 537 defvar VLDSX0Cycles = SiFive7GetCyclesDefault<mx>.c; 538 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c; 539 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 540 defm SiFive7 : LMULWriteResMXVariant<"WriteVLDS64", VLDSX0Pred, [SiFive7VCQ, SiFive7VL], 541 4, [0, 1], [1, !add(1, VLDSX0Cycles)], !add(3, Cycles), 542 [0, 1], [1, !add(1, Cycles)], mx, IsWorstCase>; 543 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 544 defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 545 defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 546 } 547 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 548 defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 549 defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 550 defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 551 } 552} 553 554// VLD*R is LMUL aware 555let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 556 def : WriteRes<WriteVLD1R, [SiFive7VCQ, SiFive7VL]>; 557let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 558 def : WriteRes<WriteVLD2R, [SiFive7VCQ, SiFive7VL]>; 559let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 560 def : WriteRes<WriteVLD4R, [SiFive7VCQ, SiFive7VL]>; 561let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 562 def : WriteRes<WriteVLD8R, [SiFive7VCQ, SiFive7VL]>; 563// VST*R is LMUL aware 564let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 565 def : WriteRes<WriteVST1R, [SiFive7VCQ, SiFive7VS]>; 566let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 567 def : WriteRes<WriteVST2R, [SiFive7VCQ, SiFive7VS]>; 568let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 569 def : WriteRes<WriteVST4R, [SiFive7VCQ, SiFive7VS]>; 570let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 571 def : WriteRes<WriteVST8R, [SiFive7VCQ, SiFive7VS]>; 572 573// Segmented Loads and Stores 574// Unit-stride segmented loads and stores are effectively converted into strided 575// segment loads and stores. Strided segment loads and stores operate at up to 576// one segment per cycle if the segment fits within one aligned memory beat. 577// Indexed segment loads and stores operate at the same rate as strided ones, 578// but they stall the machine until all addresses have been generated. 579foreach mx = SchedMxList in { 580 foreach eew = [8, 16, 32, 64] in { 581 defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c; 582 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 583 // Does not chain so set latency high 584 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 585 defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 586 defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 587 } 588 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 589 defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 590 foreach nf=3-8 in { 591 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 592 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 593 // Does not chain so set latency high 594 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 595 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 596 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 597 } 598 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 599 defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 600 } 601 } 602} 603foreach mx = SchedMxList in { 604 foreach nf=2-8 in { 605 foreach eew = [8, 16, 32, 64] in { 606 defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c; 607 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 608 // Does not chain so set latency high 609 let Latency = !add(3, Cycles), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 610 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 611 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 612 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VL], mx, IsWorstCase>; 613 } 614 let Latency = 1, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 615 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 616 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 617 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VCQ, SiFive7VS], mx, IsWorstCase>; 618 } 619 } 620 } 621} 622 623// 11. Vector Integer Arithmetic Instructions 624foreach mx = SchedMxList in { 625 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 626 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 627 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 628 defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 629 defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 630 defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 631 defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 632 defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 633 defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 634 defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 635 defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 636 defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 637 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 638 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 639 defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 640 defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 641 defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 642 defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 643 defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 644 defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 645 } 646 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 647 defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 648 defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 649 defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 650 defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 651 defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 652 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 653 defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 654 } 655 // Mask results can't chain. 656 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 657 defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 658 defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 659 defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 660 } 661} 662foreach mx = SchedMxList in { 663 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 664 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 665 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 666 defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 667 } 668} 669foreach mx = SchedMxList in { 670 foreach sew = SchedSEWSet<mx>.val in { 671 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 672 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 673 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 674 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 675 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 676 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 677 } 678 } 679} 680 681// Widening 682foreach mx = SchedMxListW in { 683 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 684 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 685 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 686 defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 687 defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 688 defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 689 defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 690 defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 691 defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 692 defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 693 } 694} 695// Narrowing 696foreach mx = SchedMxListW in { 697 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 698 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 699 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 700 defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 701 defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 702 defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 703 } 704} 705 706// 12. Vector Fixed-Point Arithmetic Instructions 707foreach mx = SchedMxList in { 708 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 709 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 710 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 711 defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 712 defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 713 defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 714 defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 715 defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 716 defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 717 defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 718 defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 719 defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 720 defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 721 } 722} 723// Narrowing 724foreach mx = SchedMxListW in { 725 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 726 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 727 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 728 defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 729 defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 730 defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 731 } 732} 733 734// 13. Vector Floating-Point Instructions 735foreach mx = SchedMxListF in { 736 foreach sew = SchedSEWSet<mx, isF=1>.val in { 737 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 738 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; 739 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 740 defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 741 defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 742 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 743 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 744 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 745 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 746 defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 747 defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 748 } 749 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 750 defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 751 defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 752 defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 753 defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 754 } 755 } 756} 757foreach mx = SchedMxList in { 758 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 759 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 760 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 761 defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 762 } 763 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 764 defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 765 defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 766 defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 767 } 768 // Mask results can't chain. 769 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 770 defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 771 defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 772 } 773} 774foreach mx = SchedMxListF in { 775 foreach sew = SchedSEWSet<mx, isF=1>.val in { 776 defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c, 777 !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4)); 778 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 779 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 780 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 781 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 782 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 783 } 784 } 785} 786 787// Widening 788foreach mx = SchedMxListW in { 789 foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in { 790 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 791 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c; 792 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 793 defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 794 } 795} 796foreach mx = SchedMxListFW in { 797 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { 798 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 799 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; 800 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 801 defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 802 defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 803 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 804 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 805 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 806 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 807 defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 808 } 809 } 810 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 811 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c; 812 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 813 defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 814} 815// Narrowing 816foreach mx = SchedMxListW in { 817 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 818 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c; 819 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 820 defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 821 } 822} 823foreach mx = SchedMxListFW in { 824 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { 825 defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c; 826 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; 827 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 828 defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 829 defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 830 } 831 } 832} 833 834// 14. Vector Reduction Operations 835foreach mx = SchedMxList in { 836 foreach sew = SchedSEWSet<mx>.val in { 837 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 838 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 839 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 840 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VCQ, SiFive7VA], 841 mx, sew, IsWorstCase>; 842 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], 843 mx, sew, IsWorstCase>; 844 } 845 } 846} 847 848foreach mx = SchedMxListWRed in { 849 foreach sew = SchedSEWSet<mx, 0, 1>.val in { 850 defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c; 851 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; 852 let Latency = Cycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in 853 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VCQ, SiFive7VA], 854 mx, sew, IsWorstCase>; 855 } 856} 857 858foreach mx = SchedMxListF in { 859 foreach sew = SchedSEWSet<mx, 1>.val in { 860 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 861 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 862 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in { 863 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VCQ, SiFive7VA], 864 mx, sew, IsWorstCase>; 865 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VCQ, SiFive7VA], 866 mx, sew, IsWorstCase>; 867 } 868 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 869 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in 870 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VCQ, SiFive7VA], 871 mx, sew, IsWorstCase>; 872 } 873} 874 875foreach mx = SchedMxListFWRed in { 876 foreach sew = SchedSEWSet<mx, 1, 1>.val in { 877 defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c; 878 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; 879 let Latency = RedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, RedCycles)] in 880 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VCQ, SiFive7VA], 881 mx, sew, IsWorstCase>; 882 defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c; 883 let Latency = OrdRedCycles, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, OrdRedCycles)] in 884 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VCQ, SiFive7VA], 885 mx, sew, IsWorstCase>; 886 } 887} 888 889// 15. Vector Mask Instructions 890foreach mx = SchedMxList in { 891 defvar Cycles = SiFive7GetCyclesVMask<mx>.c; 892 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 893 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 894 defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 895 defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 896 defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 897 defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 898 } 899} 900foreach mx = SchedMxList in { 901 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 902 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 903 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 904 defm "" : LMULWriteResMX<"WriteVIotaV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 905 defm "" : LMULWriteResMX<"WriteVIdxV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 906 } 907} 908 909// 16. Vector Permutation Instructions 910let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 1)] in { 911 def : WriteRes<WriteVMovSX, [SiFive7VCQ, SiFive7VA]>; 912 def : WriteRes<WriteVMovXS, [SiFive7VCQ, SiFive7VA]>; 913 def : WriteRes<WriteVMovSF, [SiFive7VCQ, SiFive7VA]>; 914 def : WriteRes<WriteVMovFS, [SiFive7VCQ, SiFive7VA]>; 915} 916foreach mx = SchedMxList in { 917 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 918 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 919 let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 920 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 921 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 922 } 923} 924 925foreach mx = SchedMxList in { 926 foreach sew = SchedSEWSet<mx>.val in { 927 defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c; 928 defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 929 let Latency = !add(Cycles, 3), AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 930 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 931 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 932 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>; 933 } 934 } 935} 936 937foreach mx = SchedMxList in { 938 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 939 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 940 let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in { 941 defm "" : LMULWriteResMX<"WriteVSlideUpX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 942 defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 943 defm "" : LMULWriteResMX<"WriteVSlideI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 944 defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 945 defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 946 } 947} 948 949// VMov*V is LMUL Aware 950let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 2)] in 951 def : WriteRes<WriteVMov1V, [SiFive7VCQ, SiFive7VA]>; 952let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 4)] in 953 def : WriteRes<WriteVMov2V, [SiFive7VCQ, SiFive7VA]>; 954let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 8)] in 955 def : WriteRes<WriteVMov4V, [SiFive7VCQ, SiFive7VA]>; 956let Latency = 4, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, 16)] in 957 def : WriteRes<WriteVMov8V, [SiFive7VCQ, SiFive7VA]>; 958 959// Others 960def : WriteRes<WriteCSR, [SiFive7PipeB]>; 961def : WriteRes<WriteNop, []>; 962let Latency = 3 in 963 def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>; 964 965def : InstRW<[WriteIALU], (instrs COPY)>; 966 967// VCIX 968// 969// In principle we don't know the latency of any VCIX instructions (they 970// depends on a particular coprocessor implementation). However, the default 971// latency of 1 can lead to issues [1]. So instead we set the latency to the 972// default provided by `SiFive7GetCyclesDefault`. This is still not accurate 973// and can lead to suboptimal codegen, but should hopefully be a better 974// starting point. 975// 976// [1] https://github.com/llvm/llvm-project/issues/83391 977foreach mx = SchedMxList in { 978 defvar Cycles = SiFive7GetCyclesDefault<mx>.c; 979 defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c; 980 let Latency = Cycles, 981 AcquireAtCycles = [0, 1], 982 ReleaseAtCycles = [1, !add(1, Cycles)] in { 983 defm "" : LMULWriteResMX<"WriteVC_V_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 984 defm "" : LMULWriteResMX<"WriteVC_V_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 985 defm "" : LMULWriteResMX<"WriteVC_V_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 986 defm "" : LMULWriteResMX<"WriteVC_V_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 987 defm "" : LMULWriteResMX<"WriteVC_V_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 988 defm "" : LMULWriteResMX<"WriteVC_V_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 989 defm "" : LMULWriteResMX<"WriteVC_V_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 990 defm "" : LMULWriteResMX<"WriteVC_V_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 991 defm "" : LMULWriteResMX<"WriteVC_V_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 992 defm "" : LMULWriteResMX<"WriteVC_V_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 993 defm "" : LMULWriteResMX<"WriteVC_V_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 994 foreach f = ["FPR16", "FPR32", "FPR64"] in { 995 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 996 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 997 defm "" : LMULWriteResMX<"WriteVC_V_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 998 } 999 defm "" : LMULWriteResMX<"WriteVC_I", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1000 defm "" : LMULWriteResMX<"WriteVC_X", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1001 defm "" : LMULWriteResMX<"WriteVC_IV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1002 defm "" : LMULWriteResMX<"WriteVC_VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1003 defm "" : LMULWriteResMX<"WriteVC_XV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1004 defm "" : LMULWriteResMX<"WriteVC_IVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1005 defm "" : LMULWriteResMX<"WriteVC_IVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1006 defm "" : LMULWriteResMX<"WriteVC_VVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1007 defm "" : LMULWriteResMX<"WriteVC_VVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1008 defm "" : LMULWriteResMX<"WriteVC_XVV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1009 defm "" : LMULWriteResMX<"WriteVC_XVW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1010 foreach f = ["FPR16", "FPR32", "FPR64"] in { 1011 defm "" : LMULWriteResMX<"WriteVC_" # f # "V", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1012 defm "" : LMULWriteResMX<"WriteVC_" # f # "VV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1013 defm "" : LMULWriteResMX<"WriteVC_" # f # "VW", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; 1014 } 1015 } 1016} 1017 1018//===----------------------------------------------------------------------===// 1019 1020// Bypass and advance 1021def : SiFive7AnyToGPRBypass<ReadJmp>; 1022def : SiFive7AnyToGPRBypass<ReadJalr>; 1023def : ReadAdvance<ReadCSR, 0>; 1024def : SiFive7AnyToGPRBypass<ReadStoreData>; 1025def : ReadAdvance<ReadMemBase, 0>; 1026def : SiFive7AnyToGPRBypass<ReadIALU>; 1027def : SiFive7AnyToGPRBypass<ReadIALU32>; 1028def : SiFive7AnyToGPRBypass<ReadShiftImm>; 1029def : SiFive7AnyToGPRBypass<ReadShiftImm32>; 1030def : SiFive7AnyToGPRBypass<ReadShiftReg>; 1031def : SiFive7AnyToGPRBypass<ReadShiftReg32>; 1032def : ReadAdvance<ReadIDiv, 0>; 1033def : ReadAdvance<ReadIDiv32, 0>; 1034def : ReadAdvance<ReadIRem, 0>; 1035def : ReadAdvance<ReadIRem32, 0>; 1036def : ReadAdvance<ReadIMul, 0>; 1037def : ReadAdvance<ReadIMul32, 0>; 1038def : ReadAdvance<ReadAtomicWA, 0>; 1039def : ReadAdvance<ReadAtomicWD, 0>; 1040def : ReadAdvance<ReadAtomicDA, 0>; 1041def : ReadAdvance<ReadAtomicDD, 0>; 1042def : ReadAdvance<ReadAtomicLDW, 0>; 1043def : ReadAdvance<ReadAtomicLDD, 0>; 1044def : ReadAdvance<ReadAtomicSTW, 0>; 1045def : ReadAdvance<ReadAtomicSTD, 0>; 1046def : ReadAdvance<ReadFStoreData, 0>; 1047def : ReadAdvance<ReadFMemBase, 0>; 1048def : ReadAdvance<ReadFAdd16, 0>; 1049def : ReadAdvance<ReadFAdd32, 0>; 1050def : ReadAdvance<ReadFAdd64, 0>; 1051def : ReadAdvance<ReadFMul16, 0>; 1052def : ReadAdvance<ReadFMA16, 0>; 1053def : ReadAdvance<ReadFMA16Addend, 0>; 1054def : ReadAdvance<ReadFMul32, 0>; 1055def : ReadAdvance<ReadFMul64, 0>; 1056def : ReadAdvance<ReadFMA32, 0>; 1057def : ReadAdvance<ReadFMA32Addend, 0>; 1058def : ReadAdvance<ReadFMA64, 0>; 1059def : ReadAdvance<ReadFMA64Addend, 0>; 1060def : ReadAdvance<ReadFDiv16, 0>; 1061def : ReadAdvance<ReadFDiv32, 0>; 1062def : ReadAdvance<ReadFDiv64, 0>; 1063def : ReadAdvance<ReadFSqrt16, 0>; 1064def : ReadAdvance<ReadFSqrt32, 0>; 1065def : ReadAdvance<ReadFSqrt64, 0>; 1066def : ReadAdvance<ReadFCmp16, 0>; 1067def : ReadAdvance<ReadFCmp32, 0>; 1068def : ReadAdvance<ReadFCmp64, 0>; 1069def : ReadAdvance<ReadFSGNJ16, 0>; 1070def : ReadAdvance<ReadFSGNJ32, 0>; 1071def : ReadAdvance<ReadFSGNJ64, 0>; 1072def : ReadAdvance<ReadFMinMax16, 0>; 1073def : ReadAdvance<ReadFMinMax32, 0>; 1074def : ReadAdvance<ReadFMinMax64, 0>; 1075def : ReadAdvance<ReadFCvtF16ToI32, 0>; 1076def : ReadAdvance<ReadFCvtF16ToI64, 0>; 1077def : ReadAdvance<ReadFCvtF32ToI32, 0>; 1078def : ReadAdvance<ReadFCvtF32ToI64, 0>; 1079def : ReadAdvance<ReadFCvtF64ToI32, 0>; 1080def : ReadAdvance<ReadFCvtF64ToI64, 0>; 1081def : ReadAdvance<ReadFCvtI32ToF16, 0>; 1082def : ReadAdvance<ReadFCvtI32ToF32, 0>; 1083def : ReadAdvance<ReadFCvtI32ToF64, 0>; 1084def : ReadAdvance<ReadFCvtI64ToF16, 0>; 1085def : ReadAdvance<ReadFCvtI64ToF32, 0>; 1086def : ReadAdvance<ReadFCvtI64ToF64, 0>; 1087def : ReadAdvance<ReadFCvtF32ToF64, 0>; 1088def : ReadAdvance<ReadFCvtF64ToF32, 0>; 1089def : ReadAdvance<ReadFCvtF16ToF32, 0>; 1090def : ReadAdvance<ReadFCvtF32ToF16, 0>; 1091def : ReadAdvance<ReadFCvtF16ToF64, 0>; 1092def : ReadAdvance<ReadFCvtF64ToF16, 0>; 1093def : ReadAdvance<ReadFMovF16ToI16, 0>; 1094def : ReadAdvance<ReadFMovI16ToF16, 0>; 1095def : ReadAdvance<ReadFMovF32ToI32, 0>; 1096def : ReadAdvance<ReadFMovI32ToF32, 0>; 1097def : ReadAdvance<ReadFMovF64ToI64, 0>; 1098def : ReadAdvance<ReadFMovI64ToF64, 0>; 1099def : ReadAdvance<ReadFClass16, 0>; 1100def : ReadAdvance<ReadFClass32, 0>; 1101def : ReadAdvance<ReadFClass64, 0>; 1102 1103def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>; 1104def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>; 1105 1106// Bitmanip 1107def : SiFive7AnyToGPRBypass<ReadRotateImm>; 1108def : SiFive7AnyToGPRBypass<ReadRotateImm32>; 1109def : SiFive7AnyToGPRBypass<ReadRotateReg>; 1110def : SiFive7AnyToGPRBypass<ReadRotateReg32>; 1111def : SiFive7AnyToGPRBypass<ReadCLZ>; 1112def : SiFive7AnyToGPRBypass<ReadCLZ32>; 1113def : SiFive7AnyToGPRBypass<ReadCTZ>; 1114def : SiFive7AnyToGPRBypass<ReadCTZ32>; 1115def : ReadAdvance<ReadCPOP, 0>; 1116def : ReadAdvance<ReadCPOP32, 0>; 1117def : SiFive7AnyToGPRBypass<ReadORCB>; 1118def : SiFive7AnyToGPRBypass<ReadIMinMax>; 1119def : SiFive7AnyToGPRBypass<ReadREV8>; 1120def : SiFive7AnyToGPRBypass<ReadSHXADD>; 1121def : SiFive7AnyToGPRBypass<ReadSHXADD32>; 1122// Single-bit instructions 1123def : SiFive7AnyToGPRBypass<ReadSingleBit>; 1124def : SiFive7AnyToGPRBypass<ReadSingleBitImm>; 1125 1126// 6. Configuration-Setting Instructions 1127def : ReadAdvance<ReadVSETVLI, 2>; 1128def : ReadAdvance<ReadVSETVL, 2>; 1129 1130// 7. Vector Loads and Stores 1131def : ReadAdvance<ReadVLDX, 0>; 1132def : ReadAdvance<ReadVSTX, 0>; 1133defm "" : LMULReadAdvance<"ReadVSTEV", 0>; 1134defm "" : LMULReadAdvance<"ReadVSTM", 0>; 1135def : ReadAdvance<ReadVLDSX, 0>; 1136def : ReadAdvance<ReadVSTSX, 0>; 1137defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; 1138defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; 1139defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; 1140defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; 1141defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; 1142defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; 1143defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; 1144defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; 1145defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; 1146defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; 1147defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; 1148defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; 1149defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; 1150defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; 1151defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; 1152defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; 1153defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; 1154defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; 1155defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; 1156defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; 1157defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; 1158defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; 1159defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; 1160defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; 1161// LMUL Aware 1162def : ReadAdvance<ReadVST1R, 0>; 1163def : ReadAdvance<ReadVST2R, 0>; 1164def : ReadAdvance<ReadVST4R, 0>; 1165def : ReadAdvance<ReadVST8R, 0>; 1166 1167// 12. Vector Integer Arithmetic Instructions 1168defm : LMULReadAdvance<"ReadVIALUV", 0>; 1169defm : LMULReadAdvance<"ReadVIALUX", 0>; 1170defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; 1171defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; 1172defm : LMULReadAdvance<"ReadVExtV", 0>; 1173defm : LMULReadAdvance<"ReadVICALUV", 0>; 1174defm : LMULReadAdvance<"ReadVICALUX", 0>; 1175defm : LMULReadAdvance<"ReadVShiftV", 0>; 1176defm : LMULReadAdvance<"ReadVShiftX", 0>; 1177defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; 1178defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; 1179defm : LMULReadAdvance<"ReadVICmpV", 0>; 1180defm : LMULReadAdvance<"ReadVICmpX", 0>; 1181defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; 1182defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; 1183defm : LMULReadAdvance<"ReadVIMulV", 0>; 1184defm : LMULReadAdvance<"ReadVIMulX", 0>; 1185defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; 1186defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; 1187defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; 1188defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; 1189defm : LMULReadAdvance<"ReadVIMulAddV", 0>; 1190defm : LMULReadAdvance<"ReadVIMulAddX", 0>; 1191defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; 1192defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; 1193defm : LMULReadAdvance<"ReadVIMergeV", 0>; 1194defm : LMULReadAdvance<"ReadVIMergeX", 0>; 1195defm : LMULReadAdvance<"ReadVIMovV", 0>; 1196defm : LMULReadAdvance<"ReadVIMovX", 0>; 1197 1198// 13. Vector Fixed-Point Arithmetic Instructions 1199defm "" : LMULReadAdvance<"ReadVSALUV", 0>; 1200defm "" : LMULReadAdvance<"ReadVSALUX", 0>; 1201defm "" : LMULReadAdvance<"ReadVAALUV", 0>; 1202defm "" : LMULReadAdvance<"ReadVAALUX", 0>; 1203defm "" : LMULReadAdvance<"ReadVSMulV", 0>; 1204defm "" : LMULReadAdvance<"ReadVSMulX", 0>; 1205defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; 1206defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; 1207defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; 1208defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; 1209 1210// 14. Vector Floating-Point Instructions 1211defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>; 1212defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>; 1213defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>; 1214defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>; 1215defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>; 1216defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>; 1217defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; 1218defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; 1219defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>; 1220defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>; 1221defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>; 1222defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>; 1223defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; 1224defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; 1225defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; 1226defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; 1227defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; 1228defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; 1229defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; 1230defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; 1231defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; 1232defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; 1233defm "" : LMULReadAdvance<"ReadVFClassV", 0>; 1234defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; 1235defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; 1236defm "" : LMULReadAdvance<"ReadVFMovF", 0>; 1237defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; 1238defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; 1239defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; 1240defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; 1241defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; 1242defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>; 1243defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; 1244defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; 1245 1246// 15. Vector Reduction Operations 1247def : ReadAdvance<ReadVIRedV, 0>; 1248def : ReadAdvance<ReadVIRedV0, 0>; 1249def : ReadAdvance<ReadVIWRedV, 0>; 1250def : ReadAdvance<ReadVIWRedV0, 0>; 1251def : ReadAdvance<ReadVFRedV, 0>; 1252def : ReadAdvance<ReadVFRedV0, 0>; 1253def : ReadAdvance<ReadVFRedOV, 0>; 1254def : ReadAdvance<ReadVFRedOV0, 0>; 1255def : ReadAdvance<ReadVFWRedV, 0>; 1256def : ReadAdvance<ReadVFWRedV0, 0>; 1257def : ReadAdvance<ReadVFWRedOV, 0>; 1258def : ReadAdvance<ReadVFWRedOV0, 0>; 1259 1260// 16. Vector Mask Instructions 1261defm "" : LMULReadAdvance<"ReadVMALUV", 0>; 1262defm "" : LMULReadAdvance<"ReadVMPopV", 0>; 1263defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; 1264defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; 1265defm "" : LMULReadAdvance<"ReadVIotaV", 0>; 1266 1267// 17. Vector Permutation Instructions 1268def : ReadAdvance<ReadVMovXS, 0>; 1269def : ReadAdvance<ReadVMovSX_V, 0>; 1270def : ReadAdvance<ReadVMovSX_X, 0>; 1271def : ReadAdvance<ReadVMovFS, 0>; 1272def : ReadAdvance<ReadVMovSF_V, 0>; 1273def : ReadAdvance<ReadVMovSF_F, 0>; 1274defm "" : LMULReadAdvance<"ReadVISlideV", 0>; 1275defm "" : LMULReadAdvance<"ReadVISlideX", 0>; 1276defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; 1277defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; 1278defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; 1279defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; 1280defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>; 1281defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>; 1282defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; 1283defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; 1284defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; 1285defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; 1286// LMUL Aware 1287def : ReadAdvance<ReadVMov1V, 0>; 1288def : ReadAdvance<ReadVMov2V, 0>; 1289def : ReadAdvance<ReadVMov4V, 0>; 1290def : ReadAdvance<ReadVMov8V, 0>; 1291 1292// Others 1293def : ReadAdvance<ReadVMask, 0>; 1294def : ReadAdvance<ReadVPassthru_WorstCase, 0>; 1295foreach mx = SchedMxList in { 1296 def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx), 0>; 1297 foreach sew = SchedSEWSet<mx>.val in 1298 def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx # "_E" # sew), 0>; 1299} 1300 1301//===----------------------------------------------------------------------===// 1302// Unsupported extensions 1303defm : UnsupportedSchedZabha; 1304defm : UnsupportedSchedZbc; 1305defm : UnsupportedSchedZbkb; 1306defm : UnsupportedSchedZbkx; 1307defm : UnsupportedSchedZfa; 1308defm : UnsupportedSchedZvk; 1309} 1310