1//==- RISCVSchedSiFiveP600.td - SiFiveP600 Scheduling Defs ---*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10 11/// c is true if mx has the worst case behavior compared to LMULs in MxList. 12/// On the SiFiveP600, the worst case LMUL is the Largest LMUL 13/// and the worst case sew is the smallest SEW for that LMUL. 14class SiFiveP600IsWorstCaseMX<string mx, list<string> MxList> { 15 string LLMUL = LargestLMUL<MxList>.r; 16 bit c = !eq(mx, LLMUL); 17} 18 19class SiFiveP600IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> { 20 string LLMUL = LargestLMUL<MxList>.r; 21 int SSEW = SmallestSEW<mx, isF>.r; 22 bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); 23} 24 25// 1 Micro-Op per cycle. 26class SiFiveP600GetLMulCycles<string mx> { 27 int c = !cond( 28 !eq(mx, "M1") : 1, 29 !eq(mx, "M2") : 2, 30 !eq(mx, "M4") : 4, 31 !eq(mx, "M8") : 8, 32 !eq(mx, "MF2") : 1, 33 !eq(mx, "MF4") : 1, 34 !eq(mx, "MF8") : 1 35 ); 36} 37 38// Latency for segmented loads and stores are calculated as vl * nf. 39class SiFiveP600GetCyclesSegmented<string mx, int sew, int nf> { 40 defvar VLEN = 128; 41 defvar VLUpperBound = !cond( 42 !eq(mx, "M1") : !div(VLEN, sew), 43 !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), 44 !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), 45 !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), 46 !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), 47 !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), 48 !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), 49 ); 50 int c = !mul(VLUpperBound, nf); 51} 52 53class SiFiveP600VSM3CCycles<string mx> { 54 // c = ceil(LMUL / 2) 55 int c = !cond(!eq(mx, "M2") : 1, 56 !eq(mx, "M4") : 2, 57 !eq(mx, "M8") : 4, 58 true : 1); 59} 60 61class SiFiveP600RVVMultiplier<string mx> { 62 int c = !if(!eq(mx, "M8"), 2, 1); 63} 64 65// ====================================================================== 66// The latency and occupancy data in this section are primarily evaluated 67// from llvm-exegesis. 68// ====================================================================== 69 70class SiFiveP600VCryptoLatency<string mx> { 71 int c = !cond( 72 !eq(mx, "M4"): 4, 73 !eq(mx, "M8"): 8, 74 true: 2 75 ); 76} 77 78class SiFiveP600VFMinMaxReduction<string mx, int sew> { 79 defvar E64Lat = !cond( 80 !eq(mx, "M1") : 4, 81 !eq(mx, "M2") : 6, 82 !eq(mx, "M4") : 8, 83 !eq(mx, "M8") : 10, 84 true: 2 85 ); 86 87 defvar E64Cycles = !cond( 88 !eq(mx, "M1") : 3, 89 !eq(mx, "M2") : 4, 90 !eq(mx, "M4") : 5, 91 !eq(mx, "M8") : 6, 92 true: 2 93 ); 94 95 int latency = !if(!eq(sew, 64), E64Lat, !add(E64Lat, 2)); 96 int cycles = !if(!eq(sew, 64), E64Cycles, !add(E64Cycles, 1)); 97} 98 99class SiFiveP600VFUnorderedReduction<string mx, int sew> { 100 defvar E64Lat = !cond( 101 !eq(mx, "M1") : 6, 102 !eq(mx, "M2") : 12, 103 !eq(mx, "M4") : 18, 104 !eq(mx, "M8") : 24, 105 true: 2 106 ); 107 108 defvar E32Cycles = !cond( 109 !eq(mx, "M1") : 10, 110 !eq(mx, "M2") : 10, 111 !eq(mx, "M4") : 11, 112 !eq(mx, "M8") : 11, 113 true: 6 114 ); 115 116 int latency = !if(!eq(sew, 64), E64Lat, !add(E64Lat, 4)); 117 int cycles = !if(!eq(sew, 64), 6, E32Cycles); 118} 119 120class SiFiveP600VFWidenUnorderedReduction<string mx> { 121 int latency = !cond( 122 !eq(mx, "M1") : 10, 123 !eq(mx, "M2") : 18, 124 !eq(mx, "M4") : 24, 125 !eq(mx, "M8") : 30, 126 true: 6 127 ); 128} 129 130class SiFiveP600VFOrderedReduction<string mx, int sew> { 131 defvar Base = !if(!eq(sew, 64), 6, 10); 132 int c = !cond( 133 !eq(mx, "M1") : Base, 134 !eq(mx, "M2") : !mul(Base, 2), 135 !eq(mx, "M4") : !mul(Base, 4), 136 !eq(mx, "M8") : !mul(Base, 8), 137 true: 6 138 ); 139} 140 141class SiFiveP600VIReductionLatency<string mx> { 142 int c = !cond( 143 !eq(mx, "M2") : 4, 144 !eq(mx, "M4") : 8, 145 !eq(mx, "M8") : 16, 146 // M1 and lower 147 true: 2 148 ); 149} 150 151class SiFiveP600VIMinMaxReductionLatency<string mx, int sew> { 152 // +-----+-----+-----+-----+----+ 153 // | | E64 | E32 | E16 | E8 | 154 // +-----+-----+-----+-----+----+ 155 // | MF8 | X | X | X | 4 | 156 // +-----+-----+-----+-----+----+ 157 // | MF4 | X | X | 4 | 6 | 158 // +-----+-----+-----+-----+----+ 159 // | MF2 | X | 4 | 6 | 8 | 160 // +-----+-----+-----+-----+----+ 161 // | M1 | 4 | 6 | 8 | 10 | 162 // +-----+-----+-----+-----+----+ 163 // | M2 | 6 | 8 | 10 | 12 | 164 // +-----+-----+-----+-----+----+ 165 // | M4 | 8 | 10 | 12 | 14 | 166 // +-----+-----+-----+-----+----+ 167 // | M8 | 10 | 12 | 14 | 16 | 168 // +-----+-----+-----+-----+----+ 169 defvar BaseIndex = !cond( 170 !eq(sew, 64): 0, 171 !eq(sew, 32): 1, 172 !eq(sew, 16): 2, 173 !eq(sew, 8): 3 174 ); 175 176 defvar Latencies = [4, 6, 8, 10, 12, 14, 16]; 177 178 int c = !cond( 179 !eq(mx, "M1") : Latencies[BaseIndex], 180 !eq(mx, "M2") : Latencies[!add(BaseIndex, 1)], 181 !eq(mx, "M4") : Latencies[!add(BaseIndex, 2)], 182 !eq(mx, "M8") : Latencies[!add(BaseIndex, 3)], 183 // Fractional 184 !eq(mx, "MF2"): Latencies[!sub(BaseIndex, 1)], 185 !eq(mx, "MF4"): Latencies[!sub(BaseIndex, 2)], 186 !eq(mx, "MF8"): Latencies[!sub(BaseIndex, 3)], 187 ); 188} 189 190class SiFiveP600VIMinMaxReductionCycles<string mx, int sew> { 191 // +-----+-----+-----+-----+----+ 192 // | | E64 | E32 | E16 | E8 | 193 // +-----+-----+-----+-----+----+ 194 // | MF8 | X | X | X | 3 | 195 // +-----+-----+-----+-----+----+ 196 // | MF4 | X | X | 3 | 5 | 197 // +-----+-----+-----+-----+----+ 198 // | MF2 | X | 3 | 5 | 6 | 199 // +-----+-----+-----+-----+----+ 200 // | M1 | 3 | 4 | 6 | 8 | 201 // +-----+-----+-----+-----+----+ 202 // | M2 | 4 | 5 | 8 | 9 | 203 // +-----+-----+-----+-----+----+ 204 // | M4 | 5 | 6 | 10 | 11 | 205 // +-----+-----+-----+-----+----+ 206 // | M8 | 7 | 8 | 9 | 11 | 207 // +-----+-----+-----+-----+----+ 208 defvar Index = !cond( 209 !eq(sew, 64): 0, 210 !eq(sew, 32): 1, 211 !eq(sew, 16): 2, 212 !eq(sew, 8): 3 213 ); 214 215 defvar Cycles = [ 216 [0, 0, 0, 3], 217 [0, 0, 3, 5], 218 [0, 3, 5, 6], 219 [3, 4, 6, 8], 220 [4, 5, 8, 9], 221 [5, 6, 10, 11], 222 [7, 8, 9, 11] 223 ]; 224 225 int c = !cond( 226 !eq(mx, "MF8"): Cycles[0][Index], 227 !eq(mx, "MF4"): Cycles[1][Index], 228 !eq(mx, "MF2"): Cycles[2][Index], 229 !eq(mx, "M1"): Cycles[3][Index], 230 !eq(mx, "M2"): Cycles[4][Index], 231 !eq(mx, "M4"): Cycles[5][Index], 232 !eq(mx, "M8"): Cycles[6][Index], 233 ); 234} 235 236class SiFiveP600VSlide1<string mx> { 237 int c = !cond( 238 !eq(mx, "M2") : 3, 239 !eq(mx, "M4") : 4, 240 !eq(mx, "M8") : 8, 241 // M1 and lower 242 true: 2 243 ); 244} 245 246class SiFiveP600VSlideI<string mx> { 247 int c = !cond( 248 !eq(mx, "M2") : 4, 249 !eq(mx, "M4") : 6, 250 !eq(mx, "M8") : 8, 251 // M1 and lower 252 true: 2 253 ); 254} 255 256class SiFiveP600VSlideXComplex<string mx, bit isUp = false> { 257 int latency = !cond( 258 !eq(mx, "M2") : 11, 259 !eq(mx, "M4") : 14, 260 !eq(mx, "M8") : 20 261 ); 262 263 int cycles = !cond( 264 !eq(mx, "M2") : !if(isUp, 10, 11), 265 !eq(mx, "M4") : !if(isUp, 12, 14), 266 !eq(mx, "M8") : !if(isUp, 16, 20) 267 ); 268} 269 270class SiFiveP600VPermutationComplex<string mx> { 271 int c = !cond( 272 !eq(mx, "M2") : 12, 273 !eq(mx, "M4") : 16, 274 !eq(mx, "M8") : 24 275 ); 276} 277 278class SiFiveP600VSHA2MSCycles<string mx, int sew> { 279 int c = !cond( 280 !eq(mx, "M2") : !if(!eq(sew, 32), 2, 3), 281 !eq(mx, "M4") : !if(!eq(sew, 32), 4, 6), 282 !eq(mx, "M8") : !if(!eq(sew, 32), 8, 12), 283 true: 1 284 ); 285} 286 287// SiFiveP600 machine model for scheduling and other instruction cost heuristics. 288def SiFiveP600Model : SchedMachineModel { 289 let IssueWidth = 4; // 4 micro-ops are dispatched per cycle. 290 let MicroOpBufferSize = 160; // Max micro-ops that can be buffered. 291 let LoadLatency = 4; // Cycles for loads to access the cache. 292 let MispredictPenalty = 9; // Extra cycles for a mispredicted branch. 293 let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, 294 HasStdExtZknd, HasStdExtZkne, HasStdExtZknh, 295 HasStdExtZksed, HasStdExtZksh, HasStdExtZkr, 296 HasVendorXSfvqmaccqoq]; 297 let CompleteModel = false; 298} 299 300let SchedModel = SiFiveP600Model in { 301 302def SiFiveP600IEXQ0 : ProcResource<1>; 303def SiFiveP600IEXQ1 : ProcResource<1>; 304def SiFiveP600IEXQ2 : ProcResource<1>; 305def SiFiveP600IEXQ3 : ProcResource<1>; 306def SiFiveP600FEXQ0 : ProcResource<1>; 307def SiFiveP600FEXQ1 : ProcResource<1>; 308 309// Two Load/Store ports that can issue either two loads, two stores, or one load 310// and one store (P550 has one load and one separate store pipe). 311def SiFiveP600LDST : ProcResource<2>; 312 313// 4-wide pipeline with 4 ALU pipes. 314def SiFiveP600IntArith : ProcResGroup<[SiFiveP600IEXQ0, SiFiveP600IEXQ1, SiFiveP600IEXQ2, SiFiveP600IEXQ3]>; 315defvar SiFiveP600SYS = SiFiveP600IEXQ0; 316defvar SiFiveP600CMOV = SiFiveP600IEXQ0; 317defvar SiFiveP600MulI2F = SiFiveP600IEXQ1; 318def SiFiveP600Branch : ProcResGroup<[SiFiveP600IEXQ2, SiFiveP600IEXQ3]>; 319def SiFiveP600Div : ProcResource<1>; 320 321def SiFiveP600FloatArith : ProcResGroup<[SiFiveP600FEXQ0, SiFiveP600FEXQ1]>; 322defvar SiFiveP600F2I = SiFiveP600FEXQ0; 323def SiFiveP600FloatDiv : ProcResource<1>; 324 325// Vector pipeline 326// VEXQ0 handle Mask, Simple Slide instructions, 327// VEXQ1 handle Complex Slide, Permutation, Reductions, Divide instructions. 328// Other vector instructions can be done in VEXQ0 and VEXQ1. 329def SiFiveP600VEXQ0 : ProcResource<1>; 330def SiFiveP600VEXQ1 : ProcResource<1>; 331def SiFiveP600VectorArith : ProcResGroup<[SiFiveP600VEXQ0, SiFiveP600VEXQ1]>; 332 333// Only VEXQ0 has mask unit. 334defvar SiFiveP600VectorMask = SiFiveP600VEXQ0; 335// Only VEXQ0 has vector crypto. 336defvar SiFiveP600VectorCrypto = SiFiveP600VEXQ0; 337 338def SiFiveP600VLD : ProcResource<1>; 339def SiFiveP600VST : ProcResource<1>; 340def SiFiveP600VDiv : ProcResource<1>; 341def SiFiveP600VFloatDiv : ProcResource<1>; 342 343// Integer arithmetic and logic 344def : WriteRes<WriteIALU, [SiFiveP600IntArith]>; 345def : WriteRes<WriteIALU32, [SiFiveP600IntArith]>; 346def : WriteRes<WriteShiftImm, [SiFiveP600IntArith]>; 347def : WriteRes<WriteShiftImm32, [SiFiveP600IntArith]>; 348def : WriteRes<WriteShiftReg, [SiFiveP600IntArith]>; 349def : WriteRes<WriteShiftReg32, [SiFiveP600IntArith]>; 350// Branching 351def : WriteRes<WriteJmp, [SiFiveP600Branch]>; 352def : WriteRes<WriteJal, [SiFiveP600Branch]>; 353def : WriteRes<WriteJalr, [SiFiveP600Branch]>; 354 355// CMOV 356def P600WriteCMOV : SchedWriteRes<[SiFiveP600Branch, SiFiveP600CMOV]> { 357 let Latency = 2; 358 let NumMicroOps = 2; 359} 360def : InstRW<[P600WriteCMOV], (instrs PseudoCCMOVGPRNoX0)>; 361 362let Latency = 2 in { 363// Integer multiplication 364def : WriteRes<WriteIMul, [SiFiveP600MulI2F]>; 365def : WriteRes<WriteIMul32, [SiFiveP600MulI2F]>; 366// cpop[w] look exactly like multiply. 367def : WriteRes<WriteCPOP, [SiFiveP600MulI2F]>; 368def : WriteRes<WriteCPOP32, [SiFiveP600MulI2F]>; 369} 370 371// Integer division 372def : WriteRes<WriteIDiv, [SiFiveP600MulI2F, SiFiveP600Div]> { 373 let Latency = 35; 374 let ReleaseAtCycles = [1, 34]; 375} 376def : WriteRes<WriteIDiv32, [SiFiveP600MulI2F, SiFiveP600Div]> { 377 let Latency = 20; 378 let ReleaseAtCycles = [1, 19]; 379} 380 381// Integer remainder 382def : WriteRes<WriteIRem, [SiFiveP600MulI2F, SiFiveP600Div]> { 383 let Latency = 35; 384 let ReleaseAtCycles = [1, 34]; 385} 386def : WriteRes<WriteIRem32, [SiFiveP600MulI2F, SiFiveP600Div]> { 387 let Latency = 20; 388 let ReleaseAtCycles = [1, 19]; 389} 390 391// Bitmanip 392def : WriteRes<WriteRotateImm, [SiFiveP600IntArith]>; 393def : WriteRes<WriteRotateImm32, [SiFiveP600IntArith]>; 394def : WriteRes<WriteRotateReg, [SiFiveP600IntArith]>; 395def : WriteRes<WriteRotateReg32, [SiFiveP600IntArith]>; 396 397def : WriteRes<WriteCLZ, [SiFiveP600IntArith]>; 398def : WriteRes<WriteCLZ32, [SiFiveP600IntArith]>; 399def : WriteRes<WriteCTZ, [SiFiveP600IntArith]>; 400def : WriteRes<WriteCTZ32, [SiFiveP600IntArith]>; 401 402def : WriteRes<WriteORCB, [SiFiveP600IntArith]>; 403def : WriteRes<WriteIMinMax, [SiFiveP600IntArith]>; 404 405def : WriteRes<WriteREV8, [SiFiveP600IntArith]>; 406 407def : WriteRes<WriteSHXADD, [SiFiveP600IntArith]>; 408def : WriteRes<WriteSHXADD32, [SiFiveP600IntArith]>; 409 410def : WriteRes<WriteSingleBit, [SiFiveP600IntArith]>; 411def : WriteRes<WriteSingleBitImm, [SiFiveP600IntArith]>; 412def : WriteRes<WriteBEXT, [SiFiveP600IntArith]>; 413def : WriteRes<WriteBEXTI, [SiFiveP600IntArith]>; 414 415// Memory 416def : WriteRes<WriteSTB, [SiFiveP600LDST]>; 417def : WriteRes<WriteSTH, [SiFiveP600LDST]>; 418def : WriteRes<WriteSTW, [SiFiveP600LDST]>; 419def : WriteRes<WriteSTD, [SiFiveP600LDST]>; 420def : WriteRes<WriteFST16, [SiFiveP600LDST]>; 421def : WriteRes<WriteFST32, [SiFiveP600LDST]>; 422def : WriteRes<WriteFST64, [SiFiveP600LDST]>; 423 424let Latency = 4 in { 425def : WriteRes<WriteLDB, [SiFiveP600LDST]>; 426def : WriteRes<WriteLDH, [SiFiveP600LDST]>; 427} 428let Latency = 4 in { 429def : WriteRes<WriteLDW, [SiFiveP600LDST]>; 430def : WriteRes<WriteLDD, [SiFiveP600LDST]>; 431} 432 433let Latency = 5 in { 434def : WriteRes<WriteFLD16, [SiFiveP600LDST]>; 435def : WriteRes<WriteFLD32, [SiFiveP600LDST]>; 436def : WriteRes<WriteFLD64, [SiFiveP600LDST]>; 437} 438 439// Atomic memory 440let Latency = 3 in { 441def : WriteRes<WriteAtomicSTW, [SiFiveP600LDST]>; 442def : WriteRes<WriteAtomicSTD, [SiFiveP600LDST]>; 443def : WriteRes<WriteAtomicW, [SiFiveP600LDST]>; 444def : WriteRes<WriteAtomicD, [SiFiveP600LDST]>; 445def : WriteRes<WriteAtomicLDW, [SiFiveP600LDST]>; 446def : WriteRes<WriteAtomicLDD, [SiFiveP600LDST]>; 447} 448 449// Floating point 450let Latency = 2 in { 451def : WriteRes<WriteFAdd16, [SiFiveP600FloatArith]>; 452def : WriteRes<WriteFAdd32, [SiFiveP600FloatArith]>; 453def : WriteRes<WriteFAdd64, [SiFiveP600FloatArith]>; 454} 455let Latency = 3 in { 456def : WriteRes<WriteFMul16, [SiFiveP600FloatArith]>; 457def : WriteRes<WriteFMul32, [SiFiveP600FloatArith]>; 458def : WriteRes<WriteFMul64, [SiFiveP600FloatArith]>; 459} 460let Latency = 4 in { 461def : WriteRes<WriteFMA16, [SiFiveP600FloatArith]>; 462def : WriteRes<WriteFMA32, [SiFiveP600FloatArith]>; 463def : WriteRes<WriteFMA64, [SiFiveP600FloatArith]>; 464} 465 466let Latency = 2 in { 467def : WriteRes<WriteFSGNJ16, [SiFiveP600FloatArith]>; 468def : WriteRes<WriteFSGNJ32, [SiFiveP600FloatArith]>; 469def : WriteRes<WriteFSGNJ64, [SiFiveP600FloatArith]>; 470 471def : WriteRes<WriteFMinMax16, [SiFiveP600FloatArith]>; 472def : WriteRes<WriteFMinMax32, [SiFiveP600FloatArith]>; 473def : WriteRes<WriteFMinMax64, [SiFiveP600FloatArith]>; 474} 475 476// Half precision. 477def : WriteRes<WriteFDiv16, [SiFiveP600FEXQ1, SiFiveP600FloatDiv]> { 478 let Latency = 4; 479 let ReleaseAtCycles = [1, 4]; 480} 481def : WriteRes<WriteFSqrt16, [SiFiveP600FEXQ1, SiFiveP600FloatDiv]> { 482 let Latency = 18; 483 let ReleaseAtCycles = [1, 17]; 484} 485 486// Single precision. 487def : WriteRes<WriteFDiv32, [SiFiveP600FEXQ1, SiFiveP600FloatDiv]> { 488 let Latency = 6; 489 let ReleaseAtCycles = [1, 6]; 490} 491def : WriteRes<WriteFSqrt32, [SiFiveP600FEXQ1, SiFiveP600FloatDiv]> { 492 let Latency = 18; 493 let ReleaseAtCycles = [1, 17]; 494} 495 496// Double precision 497def : WriteRes<WriteFDiv64, [SiFiveP600FEXQ1, SiFiveP600FloatDiv]> { 498 let Latency = 11; 499 let ReleaseAtCycles = [1, 11]; 500} 501def : WriteRes<WriteFSqrt64, [SiFiveP600FEXQ1, SiFiveP600FloatDiv]> { 502 let Latency = 33; 503 let ReleaseAtCycles = [1, 32]; 504} 505 506// Conversions 507let Latency = 2 in { 508def : WriteRes<WriteFCvtI32ToF16, [SiFiveP600MulI2F]>; 509def : WriteRes<WriteFCvtI32ToF32, [SiFiveP600MulI2F]>; 510def : WriteRes<WriteFCvtI32ToF64, [SiFiveP600MulI2F]>; 511def : WriteRes<WriteFCvtI64ToF16, [SiFiveP600MulI2F]>; 512def : WriteRes<WriteFCvtI64ToF32, [SiFiveP600MulI2F]>; 513def : WriteRes<WriteFCvtI64ToF64, [SiFiveP600MulI2F]>; 514def : WriteRes<WriteFCvtF16ToI32, [SiFiveP600F2I]>; 515def : WriteRes<WriteFCvtF16ToI64, [SiFiveP600F2I]>; 516def : WriteRes<WriteFCvtF16ToF32, [SiFiveP600FloatArith]>; 517def : WriteRes<WriteFCvtF16ToF64, [SiFiveP600FloatArith]>; 518def : WriteRes<WriteFCvtF32ToI32, [SiFiveP600F2I]>; 519def : WriteRes<WriteFCvtF32ToI64, [SiFiveP600F2I]>; 520def : WriteRes<WriteFCvtF32ToF16, [SiFiveP600FloatArith]>; 521def : WriteRes<WriteFCvtF32ToF64, [SiFiveP600FloatArith]>; 522def : WriteRes<WriteFCvtF64ToI32, [SiFiveP600F2I]>; 523def : WriteRes<WriteFCvtF64ToI64, [SiFiveP600F2I]>; 524def : WriteRes<WriteFCvtF64ToF16, [SiFiveP600FloatArith]>; 525def : WriteRes<WriteFCvtF64ToF32, [SiFiveP600FloatArith]>; 526 527def : WriteRes<WriteFClass16, [SiFiveP600F2I]>; 528def : WriteRes<WriteFClass32, [SiFiveP600F2I]>; 529def : WriteRes<WriteFClass64, [SiFiveP600F2I]>; 530def : WriteRes<WriteFCmp16, [SiFiveP600F2I]>; 531def : WriteRes<WriteFCmp32, [SiFiveP600F2I]>; 532def : WriteRes<WriteFCmp64, [SiFiveP600F2I]>; 533def : WriteRes<WriteFMovI16ToF16, [SiFiveP600MulI2F]>; 534def : WriteRes<WriteFMovF16ToI16, [SiFiveP600F2I]>; 535def : WriteRes<WriteFMovI32ToF32, [SiFiveP600MulI2F]>; 536def : WriteRes<WriteFMovF32ToI32, [SiFiveP600F2I]>; 537def : WriteRes<WriteFMovI64ToF64, [SiFiveP600MulI2F]>; 538def : WriteRes<WriteFMovF64ToI64, [SiFiveP600F2I]>; 539} 540 541// 6. Configuration-Setting Instructions 542def : WriteRes<WriteVSETVLI, [SiFiveP600SYS]>; 543def : WriteRes<WriteVSETIVLI, [SiFiveP600SYS]>; 544def : WriteRes<WriteVSETVL, [SiFiveP600SYS]>; 545 546// 7. Vector Loads and Stores 547// FIXME: This unit is still being improved, currently 548// it is based on stage numbers. Estimates are optimistic, 549// latency may be longer. 550foreach mx = SchedMxList in { 551 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 552 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 553 let Latency = 8, ReleaseAtCycles = [LMulLat] in { 554 defm "" : LMULWriteResMX<"WriteVLDE", [SiFiveP600VLD], mx, IsWorstCase>; 555 defm "" : LMULWriteResMX<"WriteVLDM", [SiFiveP600VLD], mx, IsWorstCase>; 556 defm "" : LMULWriteResMX<"WriteVLDFF", [SiFiveP600VLD], mx, IsWorstCase>; 557 } 558 let Latency = 12, ReleaseAtCycles = [LMulLat] in { 559 defm "" : LMULWriteResMX<"WriteVLDS8", [SiFiveP600VLD], mx, IsWorstCase>; 560 defm "" : LMULWriteResMX<"WriteVLDS16", [SiFiveP600VLD], mx, IsWorstCase>; 561 defm "" : LMULWriteResMX<"WriteVLDS32", [SiFiveP600VLD], mx, IsWorstCase>; 562 defm "" : LMULWriteResMX<"WriteVLDS64", [SiFiveP600VLD], mx, IsWorstCase>; 563 } 564 let Latency = 12, ReleaseAtCycles = [LMulLat] in { 565 defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFiveP600VLD], mx, IsWorstCase>; 566 defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFiveP600VLD], mx, IsWorstCase>; 567 defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFiveP600VLD], mx, IsWorstCase>; 568 defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFiveP600VLD], mx, IsWorstCase>; 569 defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFiveP600VLD], mx, IsWorstCase>; 570 defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFiveP600VLD], mx, IsWorstCase>; 571 defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFiveP600VLD], mx, IsWorstCase>; 572 defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFiveP600VLD], mx, IsWorstCase>; 573 } 574} 575 576foreach mx = SchedMxList in { 577 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 578 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 579 let Latency = 8, ReleaseAtCycles = [LMulLat] in { 580 defm "" : LMULWriteResMX<"WriteVSTE", [SiFiveP600VST], mx, IsWorstCase>; 581 defm "" : LMULWriteResMX<"WriteVSTM", [SiFiveP600VST], mx, IsWorstCase>; 582 } 583 let Latency = 12, ReleaseAtCycles = [LMulLat] in { 584 defm "" : LMULWriteResMX<"WriteVSTS8", [SiFiveP600VST], mx, IsWorstCase>; 585 defm "" : LMULWriteResMX<"WriteVSTS16", [SiFiveP600VST], mx, IsWorstCase>; 586 defm "" : LMULWriteResMX<"WriteVSTS32", [SiFiveP600VST], mx, IsWorstCase>; 587 defm "" : LMULWriteResMX<"WriteVSTS64", [SiFiveP600VST], mx, IsWorstCase>; 588 } 589 let Latency = 12, ReleaseAtCycles = [LMulLat] in { 590 defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFiveP600VST], mx, IsWorstCase>; 591 defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFiveP600VST], mx, IsWorstCase>; 592 defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFiveP600VST], mx, IsWorstCase>; 593 defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFiveP600VST], mx, IsWorstCase>; 594 defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFiveP600VST], mx, IsWorstCase>; 595 defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFiveP600VST], mx, IsWorstCase>; 596 defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFiveP600VST], mx, IsWorstCase>; 597 defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFiveP600VST], mx, IsWorstCase>; 598 } 599} 600 601foreach mx = SchedMxList in { 602 foreach nf=2-8 in { 603 foreach eew = [8, 16, 32, 64] in { 604 defvar LMulLat = SiFiveP600GetCyclesSegmented<mx, eew, nf>.c; 605 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 606 let Latency = !add(12, LMulLat), ReleaseAtCycles = [!add(12, LMulLat)] in { 607 defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFiveP600VLD], mx, IsWorstCase>; 608 defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFiveP600VLD], mx, IsWorstCase>; 609 defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFiveP600VLD], mx, IsWorstCase>; 610 defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFiveP600VLD], mx, IsWorstCase>; 611 defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFiveP600VLD], mx, IsWorstCase>; 612 } 613 let Latency = !add(1, LMulLat), ReleaseAtCycles = [!add(12, LMulLat)] in { 614 defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFiveP600VST], mx, IsWorstCase>; 615 defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFiveP600VST], mx, IsWorstCase>; 616 defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFiveP600VST], mx, IsWorstCase>; 617 defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFiveP600VST], mx, IsWorstCase>; 618 } 619 } 620 } 621} 622 623// Whole register move/load/store 624foreach LMul = [1, 2, 4, 8] in { 625 let Latency = 8, ReleaseAtCycles = [LMul] in { 626 def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SiFiveP600VLD]>; 627 def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SiFiveP600VST]>; 628 } 629 let Latency = 2, ReleaseAtCycles = [LMul] in { 630 def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SiFiveP600VectorArith]>; 631 } 632} 633 634// 11. Vector Integer Arithmetic Instructions 635foreach mx = SchedMxList in { 636 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 637 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 638 let Latency = 2, ReleaseAtCycles = [LMulLat] in { 639 defm "" : LMULWriteResMX<"WriteVExtV", [SiFiveP600VectorArith], mx, IsWorstCase>; 640 defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP600VectorMask], mx, IsWorstCase>; 641 defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP600VectorMask], mx, IsWorstCase>; 642 defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP600VectorMask], mx, IsWorstCase>; 643 } 644 let ReleaseAtCycles = [LMulLat] in { 645 let Latency = 6 in { 646 defm "" : LMULWriteResMX<"WriteVIMulV", [SiFiveP600VectorArith], mx, IsWorstCase>; 647 defm "" : LMULWriteResMX<"WriteVIMulX", [SiFiveP600VectorArith], mx, IsWorstCase>; 648 defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>; 649 defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>; 650 } 651 652 let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c) in { 653 defm "" : LMULWriteResMX<"WriteVIALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; 654 defm "" : LMULWriteResMX<"WriteVIALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; 655 defm "" : LMULWriteResMX<"WriteVIALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; 656 defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; 657 defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; 658 defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; 659 defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorMask], mx, IsWorstCase>; 660 defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorMask], mx, IsWorstCase>; 661 defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorMask], mx, IsWorstCase>; 662 defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>; 663 defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFiveP600VectorArith], mx, IsWorstCase>; 664 defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFiveP600VectorArith], mx, IsWorstCase>; 665 defm "" : LMULWriteResMX<"WriteVIMovX", [SiFiveP600VectorArith], mx, IsWorstCase>; 666 defm "" : LMULWriteResMX<"WriteVIMovI", [SiFiveP600VectorArith], mx, IsWorstCase>; 667 defm "" : LMULWriteResMX<"WriteVShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>; 668 defm "" : LMULWriteResMX<"WriteVShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>; 669 defm "" : LMULWriteResMX<"WriteVShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>; 670 defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>; 671 defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFiveP600VectorArith], mx, IsWorstCase>; 672 defm "" : LMULWriteResMX<"WriteVIMovV", [SiFiveP600VectorArith], mx, IsWorstCase>; 673 } 674 } 675} 676// Widening 677foreach mx = SchedMxListW in { 678 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 679 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListW>.c; 680 let Latency = 6, ReleaseAtCycles = [LMulLat] in { 681 defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; 682 defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; 683 defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; 684 defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFiveP600VectorArith], mx, IsWorstCase>; 685 defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFiveP600VectorArith], mx, IsWorstCase>; 686 defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>; 687 defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>; 688 689 // Special case for variants with widen operands. 690 let ReleaseAtCycles = [!mul(LMulLat, 2)] in 691 def P600WriteVIWALUWidenOp_ # mx : SchedWriteRes<[SiFiveP600VectorArith]>; 692 } 693 694 defvar P600VIWALUBaseSchedRW = [!cast<SchedWrite>("P600WriteVIWALUWidenOp_" # mx), 695 !cast<SchedRead>("ReadVPassthru_" # mx), 696 !cast<SchedRead>("ReadVIALUV_" # mx), 697 !cast<SchedRead>("ReadVIALUV_" # mx)]; 698 699 def : InstRW<P600VIWALUBaseSchedRW, 700 (instregex "^PseudoVW(ADD|SUB)[U]?_W(V|X)_" # mx # "$")>; 701 def : InstRW<P600VIWALUBaseSchedRW[0,2,3], 702 (instregex "^PseudoVW(ADD|SUB)[U]?_WV_" # mx # "_TIED$")>; 703 704 def : InstRW<!listconcat(P600VIWALUBaseSchedRW, [!cast<SchedRead>("ReadVMask")]), 705 (instregex "^PseudoVW(ADD|SUB)[U]?_W(V|X)_" # mx # "_MASK$")>; 706 def : InstRW<!listconcat(P600VIWALUBaseSchedRW[0,1,3], [!cast<SchedRead>("ReadVMask")]), 707 (instregex "^PseudoVW(ADD|SUB)[U]?_WV_" # mx # "_MASK_TIED$")>; 708} 709 710// Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64. 711foreach mx = SchedMxList in { 712 foreach sew = SchedSEWSet<mx>.val in { 713 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 714 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 715 defvar DivMicroOpLat = 716 !cond(!eq(sew, 8): 51, !eq(sew, 16): 45, !eq(sew, 32): 42, 717 /* SEW=64 */ true: 72); 718 defvar DivLatency = !mul(DivMicroOpLat, LMulLat); 719 let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in { 720 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFiveP600VEXQ1, SiFiveP600VDiv], mx, sew, IsWorstCase>; 721 defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFiveP600VEXQ1, SiFiveP600VDiv], mx, sew, IsWorstCase>; 722 } 723 } 724} 725 726// Narrowing Shift and Clips 727foreach mx = SchedMxListW in { 728 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 729 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListW>.c; 730 let Latency = 2, ReleaseAtCycles = [LMulLat] in { 731 defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>; 732 defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>; 733 defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>; 734 defm "" : LMULWriteResMX<"WriteVNClipV", [SiFiveP600VectorArith], mx, IsWorstCase>; 735 defm "" : LMULWriteResMX<"WriteVNClipX", [SiFiveP600VectorArith], mx, IsWorstCase>; 736 defm "" : LMULWriteResMX<"WriteVNClipI", [SiFiveP600VectorArith], mx, IsWorstCase>; 737 } 738} 739 740// 12. Vector Fixed-Point Arithmetic Instructions 741foreach mx = SchedMxList in { 742 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 743 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 744 let Latency = 6, ReleaseAtCycles = [LMulLat] in { 745 defm "" : LMULWriteResMX<"WriteVSALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; 746 defm "" : LMULWriteResMX<"WriteVSALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; 747 defm "" : LMULWriteResMX<"WriteVSALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; 748 defm "" : LMULWriteResMX<"WriteVAALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; 749 defm "" : LMULWriteResMX<"WriteVAALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; 750 defm "" : LMULWriteResMX<"WriteVSMulV", [SiFiveP600VectorArith], mx, IsWorstCase>; 751 defm "" : LMULWriteResMX<"WriteVSMulX", [SiFiveP600VectorArith], mx, IsWorstCase>; 752 defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>; 753 defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>; 754 defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>; 755 } 756} 757 758// 13. Vector Floating-Point Instructions 759foreach mx = SchedMxListF in { 760 foreach sew = SchedSEWSet<mx, isF=1>.val in { 761 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 762 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; 763 let Latency = 6, ReleaseAtCycles = [LMulLat] in { 764 defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 765 defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 766 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 767 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 768 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 769 defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 770 } 771 } 772} 773foreach mx = SchedMxListF in { 774 foreach sew = SchedSEWSet<mx, isF=1>.val in { 775 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 776 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c; 777 let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c), ReleaseAtCycles = [LMulLat] in { 778 defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 779 defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 780 defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 781 defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 782 defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 783 } 784 let Latency = !if(!eq(mx, "M8"), 4, 3), ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in 785 defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 786 } 787} 788foreach mx = SchedMxList in { 789 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 790 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 791 let Latency = !if(!eq(mx, "M8"), 4, 3), ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in 792 defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>; 793 let Latency = 2, ReleaseAtCycles = [LMulLat] in { 794 defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorMask], mx, IsWorstCase>; 795 defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorMask], mx, IsWorstCase>; 796 } 797 let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c), 798 ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in { 799 defm "" : LMULWriteResMX<"WriteVFClassV", [SiFiveP600VectorArith], mx, IsWorstCase>; 800 defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>; 801 defm "" : LMULWriteResMX<"WriteVFMovV", [SiFiveP600VectorArith], mx, IsWorstCase>; 802 } 803} 804 805// Widening 806foreach mx = SchedMxListW in { 807 foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in { 808 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 809 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c; 810 let Latency = 3, ReleaseAtCycles = [LMulLat] in 811 defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 812 } 813} 814foreach mx = SchedMxListFW in { 815 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 816 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c; 817 let Latency = 6, ReleaseAtCycles = [LMulLat] in 818 defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>; 819} 820foreach mx = SchedMxListFW in { 821 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { 822 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 823 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; 824 let Latency = 6, ReleaseAtCycles = [LMulLat] in { 825 defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 826 defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 827 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 828 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 829 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 830 defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 831 defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 832 833 // Special case for variants with widen operands. 834 let ReleaseAtCycles = [!mul(LMulLat, 2)] in 835 def P600WriteVFWALUWidenOp_ # mx # _E # sew : SchedWriteRes<[SiFiveP600VectorArith]>; 836 } 837 838 defvar P600VFWALUBaseSchedRW = [!cast<SchedWrite>("P600WriteVFWALUWidenOp_" # mx # "_E" # sew), 839 !cast<SchedRead>("ReadVPassthru_" # mx # "_E" # sew), 840 !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)]; 841 842 def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)]), 843 (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "$")>; 844 def : InstRW<[P600VFWALUBaseSchedRW[0], P600VFWALUBaseSchedRW[2], !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)], 845 (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_TIED$")>; 846 847 def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUF_" # mx # "_E" # sew)]), 848 (instregex "^PseudoVFW(ADD|SUB)_WFPR" # sew # "_" # mx # "_E" # sew # "$")>; 849 850 def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")]), 851 (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_MASK$")>; 852 def : InstRW<[P600VFWALUBaseSchedRW[0], P600VFWALUBaseSchedRW[1], !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")], 853 (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_MASK_TIED$")>; 854 855 def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUF_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")]), 856 (instregex "^PseudoVFW(ADD|SUB)_WFPR" # sew # "_" # mx # "_E" # sew # "_MASK$")>; 857 } 858} 859// Narrowing 860foreach mx = SchedMxListW in { 861 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 862 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListW>.c; 863 let Latency = 3, ReleaseAtCycles = [LMulLat] in { 864 defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>; 865 } 866} 867foreach mx = SchedMxListFW in { 868 foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in { 869 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 870 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c; 871 let Latency = 3, ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in { 872 defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 873 defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>; 874 } 875 } 876} 877 878// Worst case needs around 29/25/37 * LMUL cycles for f16/32/64. 879foreach mx = SchedMxListF in { 880 foreach sew = SchedSEWSet<mx, 1>.val in { 881 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 882 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 883 defvar DivMicroOpLat = 884 !cond(!eq(sew, 16): 29, !eq(sew, 32): 25, /* SEW=64 */ true: 37); 885 defvar DivLatency = !mul(DivMicroOpLat, LMulLat); 886 let Latency = DivLatency, ReleaseAtCycles = [LMulLat, DivLatency] in { 887 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>; 888 defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>; 889 defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFiveP600VEXQ1, SiFiveP600VFloatDiv], mx, sew, IsWorstCase>; 890 } 891 } 892} 893 894// 14. Vector Reduction Operations 895foreach mx = SchedMxList in { 896 foreach sew = SchedSEWSet<mx>.val in { 897 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 898 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 899 900 let ReleaseAtCycles = [LMulLat] in { 901 let Latency = SiFiveP600VIReductionLatency<mx>.c in 902 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFiveP600VEXQ1], 903 mx, sew, IsWorstCase>; 904 905 let Latency = SiFiveP600VIMinMaxReductionLatency<mx, sew>.c, 906 ReleaseAtCycles = [SiFiveP600VIMinMaxReductionCycles<mx, sew>.c] in 907 defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFiveP600VEXQ1], 908 mx, sew, IsWorstCase>; 909 } 910 } 911} 912 913foreach mx = SchedMxListWRed in { 914 foreach sew = SchedSEWSet<mx, 0, 1>.val in { 915 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 916 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c; 917 let Latency = SiFiveP600VIReductionLatency<mx>.c, ReleaseAtCycles = [LMulLat] in { 918 defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFiveP600VEXQ1], 919 mx, sew, IsWorstCase>; 920 } 921 } 922} 923 924foreach mx = SchedMxListF in { 925 foreach sew = SchedSEWSet<mx, 1>.val in { 926 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 927 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c; 928 let Latency = SiFiveP600VFMinMaxReduction<mx, sew>.latency, 929 ReleaseAtCycles = [SiFiveP600VFMinMaxReduction<mx, sew>.cycles] in 930 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", 931 [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; 932 933 let Latency = SiFiveP600VFUnorderedReduction<mx, sew>.latency, 934 ReleaseAtCycles = [SiFiveP600VFUnorderedReduction<mx, sew>.cycles] in 935 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFiveP600VEXQ1], 936 mx, sew, IsWorstCase>; 937 938 let Latency = SiFiveP600VFOrderedReduction<mx, sew>.c, 939 ReleaseAtCycles = [SiFiveP600VFOrderedReduction<mx, sew>.c] in 940 defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFiveP600VEXQ1], 941 mx, sew, IsWorstCase>; 942 } 943} 944 945foreach mx = SchedMxListFWRed in { 946 foreach sew = SchedSEWSet<mx, 1, 1>.val in { 947 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 948 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c; 949 let Latency = SiFiveP600VFWidenUnorderedReduction<mx>.latency, 950 ReleaseAtCycles = [6] in 951 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFiveP600VEXQ1], 952 mx, sew, IsWorstCase>; 953 954 let Latency = SiFiveP600VFOrderedReduction<mx, sew>.c, 955 ReleaseAtCycles = [SiFiveP600VFOrderedReduction<mx, sew>.c] in 956 defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFiveP600VEXQ1], 957 mx, sew, IsWorstCase>; 958 } 959} 960 961// 15. Vector Mask Instructions 962foreach mx = SchedMxList in { 963 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 964 let Latency = 2 in { 965 defm "" : LMULWriteResMX<"WriteVMALUV", [SiFiveP600VectorMask], mx, IsWorstCase>; 966 defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFiveP600VectorMask], mx, IsWorstCase>; 967 968 let ReleaseAtCycles = [2] in { 969 defm "" : LMULWriteResMX<"WriteVMPopV", [SiFiveP600VectorMask], mx, IsWorstCase>; 970 defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFiveP600VectorMask], mx, IsWorstCase>; 971 } 972 } 973 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 974 let ReleaseAtCycles = [LMulLat] in { 975 let Latency = 2 in 976 defm "" : LMULWriteResMX<"WriteVIotaV", [SiFiveP600VectorMask], mx, IsWorstCase>; 977 978 // vid.v isn't executed by the mask unit. 979 let Latency = !if(!eq(mx, "M8"), 4, !if(!eq(mx, "M4"), 2, 1)) in 980 defm "" : LMULWriteResMX<"WriteVIdxV", [SiFiveP600VectorArith], mx, IsWorstCase>; 981 } 982} 983 984// 16. Vector Permutation Instructions 985// Simple Slide 986foreach mx = SchedMxList in { 987 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 988 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 989 let ReleaseAtCycles = [LMulLat] in { 990 let Latency = SiFiveP600VSlideI<mx>.c in 991 defm "" : LMULWriteResMX<"WriteVSlideI", [SiFiveP600VEXQ0], mx, IsWorstCase>; 992 993 let Latency = SiFiveP600VSlide1<mx>.c in { 994 defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFiveP600VEXQ0], mx, IsWorstCase>; 995 defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFiveP600VEXQ0], mx, IsWorstCase>; 996 } 997 } 998} 999foreach mx = ["MF8", "MF4", "MF2", "M1"] in { 1000 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 1001 let Latency = 2, ReleaseAtCycles = [1] in { 1002 defm "" : LMULWriteResMX<"WriteVSlideUpX", [SiFiveP600VEXQ0], mx, IsWorstCase>; 1003 defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFiveP600VEXQ0], mx, IsWorstCase>; 1004 } 1005} 1006 1007// Complex Slide 1008foreach mx = ["M8", "M4", "M2"] in { 1009 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 1010 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 1011 let Latency = SiFiveP600VSlideXComplex<mx>.latency in { 1012 let ReleaseAtCycles = [SiFiveP600VSlideXComplex<mx, /*isUp=*/true>.cycles] in 1013 defm "" : LMULWriteResMX<"WriteVSlideUpX", [SiFiveP600VEXQ1], mx, IsWorstCase>; 1014 let ReleaseAtCycles = [SiFiveP600VSlideXComplex<mx, /*isUp=*/false>.cycles] in 1015 defm "" : LMULWriteResMX<"WriteVSlideDownX", [SiFiveP600VEXQ1], mx, IsWorstCase>; 1016 } 1017} 1018 1019let Latency = 2, ReleaseAtCycles = [2] in { 1020 def : WriteRes<WriteVMovXS, [SiFiveP600VectorArith]>; 1021 def : WriteRes<WriteVMovSX, [SiFiveP600VectorArith]>; 1022 def : WriteRes<WriteVMovFS, [SiFiveP600VectorArith]>; 1023 def : WriteRes<WriteVMovSF, [SiFiveP600VectorArith]>; 1024} 1025 1026// Simple Gather and Compress 1027foreach mx = ["MF8", "MF4", "MF2", "M1"] in { 1028 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 1029 let Latency = 3, ReleaseAtCycles = [1] in { 1030 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFiveP600VEXQ1], mx, IsWorstCase>; 1031 } 1032} 1033 1034foreach mx = ["MF8", "MF4", "MF2", "M1"] in { 1035 foreach sew = SchedSEWSet<mx>.val in { 1036 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 1037 let Latency = 3, ReleaseAtCycles = [1] in { 1038 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; 1039 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; 1040 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; 1041 } 1042 } 1043} 1044 1045// Complex Gather and Compress 1046foreach mx = ["M2", "M4", "M8"] in { 1047 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 1048 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 1049 let Latency = 6, ReleaseAtCycles = [SiFiveP600VPermutationComplex<mx>.c] in { 1050 defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFiveP600VEXQ1], mx, IsWorstCase>; 1051 } 1052} 1053 1054foreach mx = ["M2", "M4", "M8"] in { 1055 foreach sew = SchedSEWSet<mx>.val in { 1056 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 1057 defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList>.c; 1058 let Latency = 6 in { 1059 let ReleaseAtCycles = [SiFiveP600VPermutationComplex<mx>.c] in { 1060 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; 1061 defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; 1062 } 1063 1064 let ReleaseAtCycles = [!add(SiFiveP600VPermutationComplex<mx>.c, 1)] in 1065 defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFiveP600VEXQ1], mx, sew, IsWorstCase>; 1066 } 1067 } 1068} 1069 1070// Simple Vrgather.vi 1071foreach mx = SchedMxList in { 1072 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 1073 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 1074 let Latency = 3, ReleaseAtCycles = [LMulLat] in { 1075 defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFiveP600VEXQ1], mx, IsWorstCase>; 1076 } 1077} 1078 1079// Vector Crypto 1080foreach mx = SchedMxList in { 1081 defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c; 1082 defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c; 1083 // Zvbb 1084 let ReleaseAtCycles = [LMulLat] in { 1085 let Latency = 2 in { 1086 // FIXME: Exegesis was not able to measure the latency of these instructions. 1087 // We probably should update them at some point. 1088 defm "" : LMULWriteResMX<"WriteVCPOPV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1089 defm "" : LMULWriteResMX<"WriteVWSLLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1090 defm "" : LMULWriteResMX<"WriteVWSLLX", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1091 defm "" : LMULWriteResMX<"WriteVWSLLI", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1092 } 1093 1094 let Latency = SiFiveP600VCryptoLatency<mx>.c in { 1095 defm "" : LMULWriteResMX<"WriteVBREVV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1096 defm "" : LMULWriteResMX<"WriteVCLZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1097 defm "" : LMULWriteResMX<"WriteVCTZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1098 1099 def P600WriteVANDN_ # mx : SchedWriteRes<[SiFiveP600VectorCrypto]>; 1100 } 1101 } 1102 1103 // Special case for VANDN -- we execute it on vector crypto unit. 1104 defvar P600VANDNBaseSchedRW = [!cast<SchedWrite>("P600WriteVANDN_" # mx), 1105 // VANDN always merge read operand. 1106 !cast<SchedRead>("ReadVPassthru_" # mx), 1107 !cast<SchedRead>("ReadVIALUV_" # mx), 1108 !cast<SchedRead>("ReadVIALUV_" # mx)]; 1109 def : InstRW<P600VANDNBaseSchedRW, 1110 (instregex "^PseudoVANDN_V(V|X)_" # mx # "$")>; 1111 def : InstRW<!listconcat(P600VANDNBaseSchedRW, [!cast<SchedRead>("ReadVMask")]), 1112 (instregex "^PseudoVANDN_V(V|X)_" # mx # "_MASK$")>; 1113 1114 // Zvbc 1115 let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in { 1116 defm "" : LMULWriteResMX<"WriteVCLMULV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1117 defm "" : LMULWriteResMX<"WriteVCLMULX", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1118 } 1119 // Zvkb 1120 // VANDN uses WriteVIALU[V|X|I] 1121 let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in { 1122 defm "" : LMULWriteResMX<"WriteVBREV8V", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1123 defm "" : LMULWriteResMX<"WriteVREV8V", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1124 defm "" : LMULWriteResMX<"WriteVRotV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1125 defm "" : LMULWriteResMX<"WriteVRotX", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1126 defm "" : LMULWriteResMX<"WriteVRotI", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1127 } 1128 // Zvkg 1129 let Latency = SiFiveP600VCryptoLatency<mx>.c, ReleaseAtCycles = [LMulLat] in { 1130 defm "" : LMULWriteResMX<"WriteVGHSHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1131 defm "" : LMULWriteResMX<"WriteVGMULV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1132 } 1133 // ZvknhaOrZvknhb 1134 // FIXME: The latency is probably wrong. 1135 let Latency = 3, ReleaseAtCycles = [LMulLat] in { 1136 defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1137 defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1138 defvar ZvknhSEWs = !listremove(SchedSEWSet<mx>.val, [8, 16]); 1139 // Largest SEW is the last element, assuming SchedSEWSet is sorted in ascending 1140 // order. 1141 defvar LargestZvknhSEW = !foldl(!head(ZvknhSEWs), ZvknhSEWs, last, curr, curr); 1142 foreach sew = ZvknhSEWs in { 1143 // The worst case for Zvknh[ab] is designated to the largest SEW and LMUL. 1144 defvar IsWorstCaseVSHA2MSV = !and(IsWorstCase, !eq(sew, LargestZvknhSEW)); 1145 let ReleaseAtCycles = [SiFiveP600VSHA2MSCycles<mx, sew>.c] in 1146 defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorCrypto], mx, sew, 1147 IsWorstCaseVSHA2MSV>; 1148 } 1149 } 1150 // Zvkned 1151 let Latency = 2 in { 1152 let ReleaseAtCycles = [LMulLat] in { 1153 defm "" : LMULWriteResMX<"WriteVAESMVV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1154 defm "" : LMULWriteResMX<"WriteVAESKF1V", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1155 defm "" : LMULWriteResMX<"WriteVAESKF2V", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1156 } 1157 1158 let ReleaseAtCycles = [!if(!lt(LMulLat, 2), LMulLat, !div(LMulLat, 2))] in 1159 defm "" : LMULWriteResMX<"WriteVAESZV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1160 } 1161 // Zvksed 1162 let Latency = 3, ReleaseAtCycles = [SiFiveP600VSM3CCycles<mx>.c] in 1163 defm "" : LMULWriteResMX<"WriteVSM3CV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1164 let Latency = 6, ReleaseAtCycles = [LMulLat] in 1165 defm "" : LMULWriteResMX<"WriteVSM3MEV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1166 let Latency = 3, ReleaseAtCycles = [LMulLat] in { 1167 defm "" : LMULWriteResMX<"WriteVSM4KV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1168 defm "" : LMULWriteResMX<"WriteVSM4RV", [SiFiveP600VectorCrypto], mx, IsWorstCase>; 1169 } 1170} 1171 1172// Others 1173def : WriteRes<WriteCSR, [SiFiveP600SYS]>; 1174def : WriteRes<WriteNop, []>; 1175def : WriteRes<WriteRdVLENB, [SiFiveP600SYS]>; 1176 1177// FIXME: This could be better modeled by looking at the regclasses of the operands. 1178def : InstRW<[WriteIALU, ReadIALU], (instrs COPY)>; 1179 1180//===----------------------------------------------------------------------===// 1181// Bypass and advance 1182def : ReadAdvance<ReadJmp, 0>; 1183def : ReadAdvance<ReadJalr, 0>; 1184def : ReadAdvance<ReadCSR, 0>; 1185def : ReadAdvance<ReadStoreData, 0>; 1186def : ReadAdvance<ReadMemBase, 0>; 1187def : ReadAdvance<ReadIALU, 0>; 1188def : ReadAdvance<ReadIALU32, 0>; 1189def : ReadAdvance<ReadShiftImm, 0>; 1190def : ReadAdvance<ReadShiftImm32, 0>; 1191def : ReadAdvance<ReadShiftReg, 0>; 1192def : ReadAdvance<ReadShiftReg32, 0>; 1193def : ReadAdvance<ReadIDiv, 0>; 1194def : ReadAdvance<ReadIDiv32, 0>; 1195def : ReadAdvance<ReadIRem, 0>; 1196def : ReadAdvance<ReadIRem32, 0>; 1197def : ReadAdvance<ReadIMul, 0>; 1198def : ReadAdvance<ReadIMul32, 0>; 1199def : ReadAdvance<ReadAtomicWA, 0>; 1200def : ReadAdvance<ReadAtomicWD, 0>; 1201def : ReadAdvance<ReadAtomicDA, 0>; 1202def : ReadAdvance<ReadAtomicDD, 0>; 1203def : ReadAdvance<ReadAtomicLDW, 0>; 1204def : ReadAdvance<ReadAtomicLDD, 0>; 1205def : ReadAdvance<ReadAtomicSTW, 0>; 1206def : ReadAdvance<ReadAtomicSTD, 0>; 1207def : ReadAdvance<ReadFStoreData, 0>; 1208def : ReadAdvance<ReadFMemBase, 0>; 1209def : ReadAdvance<ReadFAdd16, 0>; 1210def : ReadAdvance<ReadFAdd32, 0>; 1211def : ReadAdvance<ReadFAdd64, 0>; 1212def : ReadAdvance<ReadFMul16, 0>; 1213def : ReadAdvance<ReadFMA16, 0>; 1214def : ReadAdvance<ReadFMA16Addend, 0>; 1215def : ReadAdvance<ReadFMul32, 0>; 1216def : ReadAdvance<ReadFMA32, 0>; 1217def : ReadAdvance<ReadFMA32Addend, 0>; 1218def : ReadAdvance<ReadFMul64, 0>; 1219def : ReadAdvance<ReadFMA64, 0>; 1220def : ReadAdvance<ReadFMA64Addend, 0>; 1221def : ReadAdvance<ReadFDiv16, 0>; 1222def : ReadAdvance<ReadFDiv32, 0>; 1223def : ReadAdvance<ReadFDiv64, 0>; 1224def : ReadAdvance<ReadFSqrt16, 0>; 1225def : ReadAdvance<ReadFSqrt32, 0>; 1226def : ReadAdvance<ReadFSqrt64, 0>; 1227def : ReadAdvance<ReadFCmp16, 0>; 1228def : ReadAdvance<ReadFCmp32, 0>; 1229def : ReadAdvance<ReadFCmp64, 0>; 1230def : ReadAdvance<ReadFSGNJ16, 0>; 1231def : ReadAdvance<ReadFSGNJ32, 0>; 1232def : ReadAdvance<ReadFSGNJ64, 0>; 1233def : ReadAdvance<ReadFMinMax16, 0>; 1234def : ReadAdvance<ReadFMinMax32, 0>; 1235def : ReadAdvance<ReadFMinMax64, 0>; 1236def : ReadAdvance<ReadFCvtF16ToI32, 0>; 1237def : ReadAdvance<ReadFCvtF16ToI64, 0>; 1238def : ReadAdvance<ReadFCvtF32ToI32, 0>; 1239def : ReadAdvance<ReadFCvtF32ToI64, 0>; 1240def : ReadAdvance<ReadFCvtF64ToI32, 0>; 1241def : ReadAdvance<ReadFCvtF64ToI64, 0>; 1242def : ReadAdvance<ReadFCvtI32ToF16, 0>; 1243def : ReadAdvance<ReadFCvtI32ToF32, 0>; 1244def : ReadAdvance<ReadFCvtI32ToF64, 0>; 1245def : ReadAdvance<ReadFCvtI64ToF16, 0>; 1246def : ReadAdvance<ReadFCvtI64ToF32, 0>; 1247def : ReadAdvance<ReadFCvtI64ToF64, 0>; 1248def : ReadAdvance<ReadFCvtF32ToF64, 0>; 1249def : ReadAdvance<ReadFCvtF64ToF32, 0>; 1250def : ReadAdvance<ReadFCvtF16ToF32, 0>; 1251def : ReadAdvance<ReadFCvtF32ToF16, 0>; 1252def : ReadAdvance<ReadFCvtF16ToF64, 0>; 1253def : ReadAdvance<ReadFCvtF64ToF16, 0>; 1254def : ReadAdvance<ReadFMovF16ToI16, 0>; 1255def : ReadAdvance<ReadFMovI16ToF16, 0>; 1256def : ReadAdvance<ReadFMovF32ToI32, 0>; 1257def : ReadAdvance<ReadFMovI32ToF32, 0>; 1258def : ReadAdvance<ReadFMovF64ToI64, 0>; 1259def : ReadAdvance<ReadFMovI64ToF64, 0>; 1260def : ReadAdvance<ReadFClass16, 0>; 1261def : ReadAdvance<ReadFClass32, 0>; 1262def : ReadAdvance<ReadFClass64, 0>; 1263 1264// Bitmanip 1265def : ReadAdvance<ReadRotateImm, 0>; 1266def : ReadAdvance<ReadRotateImm32, 0>; 1267def : ReadAdvance<ReadRotateReg, 0>; 1268def : ReadAdvance<ReadRotateReg32, 0>; 1269def : ReadAdvance<ReadCLZ, 0>; 1270def : ReadAdvance<ReadCLZ32, 0>; 1271def : ReadAdvance<ReadCTZ, 0>; 1272def : ReadAdvance<ReadCTZ32, 0>; 1273def : ReadAdvance<ReadCPOP, 0>; 1274def : ReadAdvance<ReadCPOP32, 0>; 1275def : ReadAdvance<ReadORCB, 0>; 1276def : ReadAdvance<ReadIMinMax, 0>; 1277def : ReadAdvance<ReadREV8, 0>; 1278def : ReadAdvance<ReadSHXADD, 0>; 1279def : ReadAdvance<ReadSHXADD32, 0>; 1280def : ReadAdvance<ReadSingleBit, 0>; 1281def : ReadAdvance<ReadSingleBitImm, 0>; 1282 1283// 6. Configuration-Setting Instructions 1284def : ReadAdvance<ReadVSETVLI, 0>; 1285def : ReadAdvance<ReadVSETVL, 0>; 1286 1287// 7. Vector Loads and Stores 1288def : ReadAdvance<ReadVLDX, 0>; 1289def : ReadAdvance<ReadVSTX, 0>; 1290defm "" : LMULReadAdvance<"ReadVSTEV", 0>; 1291defm "" : LMULReadAdvance<"ReadVSTM", 0>; 1292def : ReadAdvance<ReadVLDSX, 0>; 1293def : ReadAdvance<ReadVSTSX, 0>; 1294defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; 1295defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; 1296defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; 1297defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; 1298defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; 1299defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; 1300defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; 1301defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; 1302defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; 1303defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; 1304defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; 1305defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; 1306defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; 1307defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; 1308defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; 1309defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; 1310defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; 1311defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; 1312defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; 1313defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; 1314defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; 1315defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; 1316defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; 1317defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; 1318// LMUL Aware 1319def : ReadAdvance<ReadVST1R, 0>; 1320def : ReadAdvance<ReadVST2R, 0>; 1321def : ReadAdvance<ReadVST4R, 0>; 1322def : ReadAdvance<ReadVST8R, 0>; 1323 1324// 12. Vector Integer Arithmetic Instructions 1325defm : LMULReadAdvance<"ReadVIALUV", 0>; 1326defm : LMULReadAdvance<"ReadVIALUX", 0>; 1327defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; 1328defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; 1329defm : LMULReadAdvance<"ReadVExtV", 0>; 1330defm : LMULReadAdvance<"ReadVICALUV", 0>; 1331defm : LMULReadAdvance<"ReadVICALUX", 0>; 1332defm : LMULReadAdvance<"ReadVShiftV", 0>; 1333defm : LMULReadAdvance<"ReadVShiftX", 0>; 1334defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; 1335defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; 1336defm : LMULReadAdvance<"ReadVICmpV", 0>; 1337defm : LMULReadAdvance<"ReadVICmpX", 0>; 1338defm : LMULReadAdvance<"ReadVIMinMaxV", 0>; 1339defm : LMULReadAdvance<"ReadVIMinMaxX", 0>; 1340defm : LMULReadAdvance<"ReadVIMulV", 0>; 1341defm : LMULReadAdvance<"ReadVIMulX", 0>; 1342defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; 1343defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; 1344defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; 1345defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; 1346defm : LMULReadAdvance<"ReadVIMulAddV", 0>; 1347defm : LMULReadAdvance<"ReadVIMulAddX", 0>; 1348defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; 1349defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; 1350defm : LMULReadAdvance<"ReadVIMergeV", 0>; 1351defm : LMULReadAdvance<"ReadVIMergeX", 0>; 1352defm : LMULReadAdvance<"ReadVIMovV", 0>; 1353defm : LMULReadAdvance<"ReadVIMovX", 0>; 1354 1355// 13. Vector Fixed-Point Arithmetic Instructions 1356defm "" : LMULReadAdvance<"ReadVSALUV", 0>; 1357defm "" : LMULReadAdvance<"ReadVSALUX", 0>; 1358defm "" : LMULReadAdvance<"ReadVAALUV", 0>; 1359defm "" : LMULReadAdvance<"ReadVAALUX", 0>; 1360defm "" : LMULReadAdvance<"ReadVSMulV", 0>; 1361defm "" : LMULReadAdvance<"ReadVSMulX", 0>; 1362defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; 1363defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; 1364defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; 1365defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; 1366 1367// 14. Vector Floating-Point Instructions 1368defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>; 1369defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>; 1370defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>; 1371defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>; 1372defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>; 1373defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>; 1374defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; 1375defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; 1376defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>; 1377defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>; 1378defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>; 1379defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>; 1380defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>; 1381defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>; 1382defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; 1383defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>; 1384defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; 1385defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; 1386defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxV", 0>; 1387defm "" : LMULSEWReadAdvanceF<"ReadVFMinMaxF", 0>; 1388defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjV", 0>; 1389defm "" : LMULSEWReadAdvanceF<"ReadVFSgnjF", 0>; 1390defm "" : LMULReadAdvance<"ReadVFClassV", 0>; 1391defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; 1392defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; 1393defm "" : LMULReadAdvance<"ReadVFMovF", 0>; 1394defm "" : LMULSEWReadAdvanceF<"ReadVFCvtIToFV", 0>; 1395defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; 1396defm "" : LMULSEWReadAdvanceW<"ReadVFWCvtIToFV", 0>; 1397defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; 1398defm "" : LMULSEWReadAdvanceFW<"ReadVFWCvtFToFV", 0>; 1399defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtIToFV", 0>; 1400defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; 1401defm "" : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; 1402 1403// 15. Vector Reduction Operations 1404def : ReadAdvance<ReadVIRedV, 0>; 1405def : ReadAdvance<ReadVIRedV0, 0>; 1406def : ReadAdvance<ReadVIWRedV, 0>; 1407def : ReadAdvance<ReadVIWRedV0, 0>; 1408def : ReadAdvance<ReadVFRedV, 0>; 1409def : ReadAdvance<ReadVFRedV0, 0>; 1410def : ReadAdvance<ReadVFRedOV, 0>; 1411def : ReadAdvance<ReadVFRedOV0, 0>; 1412def : ReadAdvance<ReadVFWRedV, 0>; 1413def : ReadAdvance<ReadVFWRedV0, 0>; 1414def : ReadAdvance<ReadVFWRedOV, 0>; 1415def : ReadAdvance<ReadVFWRedOV0, 0>; 1416 1417// 16. Vector Mask Instructions 1418defm "" : LMULReadAdvance<"ReadVMALUV", 0>; 1419defm "" : LMULReadAdvance<"ReadVMPopV", 0>; 1420defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; 1421defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; 1422defm "" : LMULReadAdvance<"ReadVIotaV", 0>; 1423 1424// 17. Vector Permutation Instructions 1425def : ReadAdvance<ReadVMovXS, 0>; 1426def : ReadAdvance<ReadVMovSX_V, 0>; 1427def : ReadAdvance<ReadVMovSX_X, 0>; 1428def : ReadAdvance<ReadVMovFS, 0>; 1429def : ReadAdvance<ReadVMovSF_V, 0>; 1430def : ReadAdvance<ReadVMovSF_F, 0>; 1431defm "" : LMULReadAdvance<"ReadVISlideV", 0>; 1432defm "" : LMULReadAdvance<"ReadVISlideX", 0>; 1433defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; 1434defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; 1435defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; 1436defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; 1437defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_data", 0>; 1438defm "" : LMULSEWReadAdvance<"ReadVRGatherEI16VV_index", 0>; 1439defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; 1440defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; 1441defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; 1442defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; 1443// LMUL Aware 1444def : ReadAdvance<ReadVMov1V, 0>; 1445def : ReadAdvance<ReadVMov2V, 0>; 1446def : ReadAdvance<ReadVMov4V, 0>; 1447def : ReadAdvance<ReadVMov8V, 0>; 1448 1449// Others 1450def : ReadAdvance<ReadVMask, 0>; 1451def : ReadAdvance<ReadVPassthru_WorstCase, 0>; 1452foreach mx = SchedMxList in { 1453 def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx), 0>; 1454 foreach sew = SchedSEWSet<mx>.val in 1455 def : ReadAdvance<!cast<SchedRead>("ReadVPassthru_" # mx # "_E" # sew), 0>; 1456} 1457 1458// Vector Crypto Extensions 1459// Zvbb 1460defm "" : LMULReadAdvance<"ReadVBREVV", 0>; 1461defm "" : LMULReadAdvance<"ReadVCLZV", 0>; 1462defm "" : LMULReadAdvance<"ReadVCPOPV", 0>; 1463defm "" : LMULReadAdvance<"ReadVCTZV", 0>; 1464defm "" : LMULReadAdvance<"ReadVWSLLV", 0>; 1465defm "" : LMULReadAdvance<"ReadVWSLLX", 0>; 1466// Zvbc 1467defm "" : LMULReadAdvance<"ReadVCLMULV", 0>; 1468defm "" : LMULReadAdvance<"ReadVCLMULX", 0>; 1469// Zvkb 1470// VANDN uses ReadVIALU[V|X|I] 1471defm "" : LMULReadAdvance<"ReadVBREV8V", 0>; 1472defm "" : LMULReadAdvance<"ReadVREV8V", 0>; 1473defm "" : LMULReadAdvance<"ReadVRotV", 0>; 1474defm "" : LMULReadAdvance<"ReadVRotX", 0>; 1475// Zvkg 1476defm "" : LMULReadAdvance<"ReadVGHSHV", 0>; 1477defm "" : LMULReadAdvance<"ReadVGMULV", 0>; 1478// Zvknha or Zvknhb 1479defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>; 1480defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>; 1481defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>; 1482// Zvkned 1483defm "" : LMULReadAdvance<"ReadVAESMVV", 0>; 1484defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>; 1485defm "" : LMULReadAdvance<"ReadVAESKF2V", 0>; 1486defm "" : LMULReadAdvance<"ReadVAESZV", 0>; 1487// Zvksed 1488defm "" : LMULReadAdvance<"ReadVSM4KV", 0>; 1489defm "" : LMULReadAdvance<"ReadVSM4RV", 0>; 1490// Zbksh 1491defm "" : LMULReadAdvance<"ReadVSM3CV", 0>; 1492defm "" : LMULReadAdvance<"ReadVSM3MEV", 0>; 1493 1494//===----------------------------------------------------------------------===// 1495// Unsupported extensions 1496defm : UnsupportedSchedZabha; 1497defm : UnsupportedSchedZbc; 1498defm : UnsupportedSchedZbkb; 1499defm : UnsupportedSchedZbkx; 1500defm : UnsupportedSchedSFB; 1501defm : UnsupportedSchedZfa; 1502defm : UnsupportedSchedXsfvcp; 1503} 1504