1fe6060f1SDimitry Andric //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric // This file implements a function pass that inserts VSETVLI instructions where 10*81ad6265SDimitry Andric // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11*81ad6265SDimitry Andric // instructions. 12fe6060f1SDimitry Andric // 13fe6060f1SDimitry Andric // This pass consists of 3 phases: 14fe6060f1SDimitry Andric // 15fe6060f1SDimitry Andric // Phase 1 collects how each basic block affects VL/VTYPE. 16fe6060f1SDimitry Andric // 17fe6060f1SDimitry Andric // Phase 2 uses the information from phase 1 to do a data flow analysis to 18fe6060f1SDimitry Andric // propagate the VL/VTYPE changes through the function. This gives us the 19fe6060f1SDimitry Andric // VL/VTYPE at the start of each basic block. 20fe6060f1SDimitry Andric // 21fe6060f1SDimitry Andric // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22fe6060f1SDimitry Andric // phase 2 is used to prevent inserting a VSETVLI before the first vector 23fe6060f1SDimitry Andric // instruction in the block if possible. 24fe6060f1SDimitry Andric // 25fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 26fe6060f1SDimitry Andric 27fe6060f1SDimitry Andric #include "RISCV.h" 28fe6060f1SDimitry Andric #include "RISCVSubtarget.h" 29fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 30fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 31fe6060f1SDimitry Andric #include <queue> 32fe6060f1SDimitry Andric using namespace llvm; 33fe6060f1SDimitry Andric 34fe6060f1SDimitry Andric #define DEBUG_TYPE "riscv-insert-vsetvli" 35fe6060f1SDimitry Andric #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" 36fe6060f1SDimitry Andric 37fe6060f1SDimitry Andric static cl::opt<bool> DisableInsertVSETVLPHIOpt( 38fe6060f1SDimitry Andric "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 39fe6060f1SDimitry Andric cl::desc("Disable looking through phis when inserting vsetvlis.")); 40fe6060f1SDimitry Andric 41*81ad6265SDimitry Andric static cl::opt<bool> UseStrictAsserts( 42*81ad6265SDimitry Andric "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, 43*81ad6265SDimitry Andric cl::desc("Enable strict assertion checking for the dataflow algorithm")); 44*81ad6265SDimitry Andric 45fe6060f1SDimitry Andric namespace { 46fe6060f1SDimitry Andric 47*81ad6265SDimitry Andric static unsigned getVLOpNum(const MachineInstr &MI) { 48*81ad6265SDimitry Andric return RISCVII::getVLOpNum(MI.getDesc()); 49fe6060f1SDimitry Andric } 50fe6060f1SDimitry Andric 51*81ad6265SDimitry Andric static unsigned getSEWOpNum(const MachineInstr &MI) { 52*81ad6265SDimitry Andric return RISCVII::getSEWOpNum(MI.getDesc()); 53fe6060f1SDimitry Andric } 54fe6060f1SDimitry Andric 5504eeddc0SDimitry Andric static bool isScalarMoveInstr(const MachineInstr &MI) { 5604eeddc0SDimitry Andric switch (MI.getOpcode()) { 5704eeddc0SDimitry Andric default: 5804eeddc0SDimitry Andric return false; 5904eeddc0SDimitry Andric case RISCV::PseudoVMV_S_X_M1: 6004eeddc0SDimitry Andric case RISCV::PseudoVMV_S_X_M2: 6104eeddc0SDimitry Andric case RISCV::PseudoVMV_S_X_M4: 6204eeddc0SDimitry Andric case RISCV::PseudoVMV_S_X_M8: 6304eeddc0SDimitry Andric case RISCV::PseudoVMV_S_X_MF2: 6404eeddc0SDimitry Andric case RISCV::PseudoVMV_S_X_MF4: 6504eeddc0SDimitry Andric case RISCV::PseudoVMV_S_X_MF8: 6604eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F16_M1: 6704eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F16_M2: 6804eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F16_M4: 6904eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F16_M8: 7004eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F16_MF2: 7104eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F16_MF4: 7204eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F32_M1: 7304eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F32_M2: 7404eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F32_M4: 7504eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F32_M8: 7604eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F32_MF2: 7704eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F64_M1: 7804eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F64_M2: 7904eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F64_M4: 8004eeddc0SDimitry Andric case RISCV::PseudoVFMV_S_F64_M8: 8104eeddc0SDimitry Andric return true; 8204eeddc0SDimitry Andric } 8304eeddc0SDimitry Andric } 8404eeddc0SDimitry Andric 85*81ad6265SDimitry Andric /// Get the EEW for a load or store instruction. Return None if MI is not 86*81ad6265SDimitry Andric /// a load or store which ignores SEW. 87*81ad6265SDimitry Andric static Optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { 88349cc55cSDimitry Andric switch (MI.getOpcode()) { 89349cc55cSDimitry Andric default: 90*81ad6265SDimitry Andric return None; 91349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_M1: 92349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_M1_MASK: 93349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_M2: 94349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_M2_MASK: 95349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_M4: 96349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_M4_MASK: 97349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_M8: 98349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_M8_MASK: 99349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_MF2: 100349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_MF2_MASK: 101349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_MF4: 102349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_MF4_MASK: 103349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_MF8: 104349cc55cSDimitry Andric case RISCV::PseudoVLE8_V_MF8_MASK: 105349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_M1: 106349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_M1_MASK: 107349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_M2: 108349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_M2_MASK: 109349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_M4: 110349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_M4_MASK: 111349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_M8: 112349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_M8_MASK: 113349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_MF2: 114349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_MF2_MASK: 115349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_MF4: 116349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_MF4_MASK: 117349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_MF8: 118349cc55cSDimitry Andric case RISCV::PseudoVLSE8_V_MF8_MASK: 119349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_M1: 120349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_M1_MASK: 121349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_M2: 122349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_M2_MASK: 123349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_M4: 124349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_M4_MASK: 125349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_M8: 126349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_M8_MASK: 127349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_MF2: 128349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_MF2_MASK: 129349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_MF4: 130349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_MF4_MASK: 131349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_MF8: 132349cc55cSDimitry Andric case RISCV::PseudoVSE8_V_MF8_MASK: 133349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_M1: 134349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_M1_MASK: 135349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_M2: 136349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_M2_MASK: 137349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_M4: 138349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_M4_MASK: 139349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_M8: 140349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_M8_MASK: 141349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_MF2: 142349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_MF2_MASK: 143349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_MF4: 144349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_MF4_MASK: 145349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_MF8: 146349cc55cSDimitry Andric case RISCV::PseudoVSSE8_V_MF8_MASK: 147*81ad6265SDimitry Andric return 8; 148349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_M1: 149349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_M1_MASK: 150349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_M2: 151349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_M2_MASK: 152349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_M4: 153349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_M4_MASK: 154349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_M8: 155349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_M8_MASK: 156349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_MF2: 157349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_MF2_MASK: 158349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_MF4: 159349cc55cSDimitry Andric case RISCV::PseudoVLE16_V_MF4_MASK: 160349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_M1: 161349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_M1_MASK: 162349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_M2: 163349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_M2_MASK: 164349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_M4: 165349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_M4_MASK: 166349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_M8: 167349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_M8_MASK: 168349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_MF2: 169349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_MF2_MASK: 170349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_MF4: 171349cc55cSDimitry Andric case RISCV::PseudoVLSE16_V_MF4_MASK: 172349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_M1: 173349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_M1_MASK: 174349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_M2: 175349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_M2_MASK: 176349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_M4: 177349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_M4_MASK: 178349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_M8: 179349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_M8_MASK: 180349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_MF2: 181349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_MF2_MASK: 182349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_MF4: 183349cc55cSDimitry Andric case RISCV::PseudoVSE16_V_MF4_MASK: 184349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_M1: 185349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_M1_MASK: 186349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_M2: 187349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_M2_MASK: 188349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_M4: 189349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_M4_MASK: 190349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_M8: 191349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_M8_MASK: 192349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_MF2: 193349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_MF2_MASK: 194349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_MF4: 195349cc55cSDimitry Andric case RISCV::PseudoVSSE16_V_MF4_MASK: 196*81ad6265SDimitry Andric return 16; 197349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_M1: 198349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_M1_MASK: 199349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_M2: 200349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_M2_MASK: 201349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_M4: 202349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_M4_MASK: 203349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_M8: 204349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_M8_MASK: 205349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_MF2: 206349cc55cSDimitry Andric case RISCV::PseudoVLE32_V_MF2_MASK: 207349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_M1: 208349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_M1_MASK: 209349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_M2: 210349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_M2_MASK: 211349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_M4: 212349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_M4_MASK: 213349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_M8: 214349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_M8_MASK: 215349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_MF2: 216349cc55cSDimitry Andric case RISCV::PseudoVLSE32_V_MF2_MASK: 217349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_M1: 218349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_M1_MASK: 219349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_M2: 220349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_M2_MASK: 221349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_M4: 222349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_M4_MASK: 223349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_M8: 224349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_M8_MASK: 225349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_MF2: 226349cc55cSDimitry Andric case RISCV::PseudoVSE32_V_MF2_MASK: 227349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_M1: 228349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_M1_MASK: 229349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_M2: 230349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_M2_MASK: 231349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_M4: 232349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_M4_MASK: 233349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_M8: 234349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_M8_MASK: 235349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_MF2: 236349cc55cSDimitry Andric case RISCV::PseudoVSSE32_V_MF2_MASK: 237*81ad6265SDimitry Andric return 32; 238349cc55cSDimitry Andric case RISCV::PseudoVLE64_V_M1: 239349cc55cSDimitry Andric case RISCV::PseudoVLE64_V_M1_MASK: 240349cc55cSDimitry Andric case RISCV::PseudoVLE64_V_M2: 241349cc55cSDimitry Andric case RISCV::PseudoVLE64_V_M2_MASK: 242349cc55cSDimitry Andric case RISCV::PseudoVLE64_V_M4: 243349cc55cSDimitry Andric case RISCV::PseudoVLE64_V_M4_MASK: 244349cc55cSDimitry Andric case RISCV::PseudoVLE64_V_M8: 245349cc55cSDimitry Andric case RISCV::PseudoVLE64_V_M8_MASK: 246349cc55cSDimitry Andric case RISCV::PseudoVLSE64_V_M1: 247349cc55cSDimitry Andric case RISCV::PseudoVLSE64_V_M1_MASK: 248349cc55cSDimitry Andric case RISCV::PseudoVLSE64_V_M2: 249349cc55cSDimitry Andric case RISCV::PseudoVLSE64_V_M2_MASK: 250349cc55cSDimitry Andric case RISCV::PseudoVLSE64_V_M4: 251349cc55cSDimitry Andric case RISCV::PseudoVLSE64_V_M4_MASK: 252349cc55cSDimitry Andric case RISCV::PseudoVLSE64_V_M8: 253349cc55cSDimitry Andric case RISCV::PseudoVLSE64_V_M8_MASK: 254349cc55cSDimitry Andric case RISCV::PseudoVSE64_V_M1: 255349cc55cSDimitry Andric case RISCV::PseudoVSE64_V_M1_MASK: 256349cc55cSDimitry Andric case RISCV::PseudoVSE64_V_M2: 257349cc55cSDimitry Andric case RISCV::PseudoVSE64_V_M2_MASK: 258349cc55cSDimitry Andric case RISCV::PseudoVSE64_V_M4: 259349cc55cSDimitry Andric case RISCV::PseudoVSE64_V_M4_MASK: 260349cc55cSDimitry Andric case RISCV::PseudoVSE64_V_M8: 261349cc55cSDimitry Andric case RISCV::PseudoVSE64_V_M8_MASK: 262349cc55cSDimitry Andric case RISCV::PseudoVSSE64_V_M1: 263349cc55cSDimitry Andric case RISCV::PseudoVSSE64_V_M1_MASK: 264349cc55cSDimitry Andric case RISCV::PseudoVSSE64_V_M2: 265349cc55cSDimitry Andric case RISCV::PseudoVSSE64_V_M2_MASK: 266349cc55cSDimitry Andric case RISCV::PseudoVSSE64_V_M4: 267349cc55cSDimitry Andric case RISCV::PseudoVSSE64_V_M4_MASK: 268349cc55cSDimitry Andric case RISCV::PseudoVSSE64_V_M8: 269349cc55cSDimitry Andric case RISCV::PseudoVSSE64_V_M8_MASK: 270*81ad6265SDimitry Andric return 64; 271*81ad6265SDimitry Andric } 272349cc55cSDimitry Andric } 273349cc55cSDimitry Andric 274*81ad6265SDimitry Andric /// Return true if this is an operation on mask registers. Note that 275*81ad6265SDimitry Andric /// this includes both arithmetic/logical ops and load/store (vlm/vsm). 276*81ad6265SDimitry Andric static bool isMaskRegOp(const MachineInstr &MI) { 277*81ad6265SDimitry Andric if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) { 278*81ad6265SDimitry Andric const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 279*81ad6265SDimitry Andric // A Log2SEW of 0 is an operation on mask registers only. 280*81ad6265SDimitry Andric return Log2SEW == 0; 281*81ad6265SDimitry Andric } 282*81ad6265SDimitry Andric return false; 283*81ad6265SDimitry Andric } 284*81ad6265SDimitry Andric 285*81ad6265SDimitry Andric static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { 286*81ad6265SDimitry Andric unsigned LMul; 287*81ad6265SDimitry Andric bool Fractional; 288*81ad6265SDimitry Andric std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); 289*81ad6265SDimitry Andric 290*81ad6265SDimitry Andric // Convert LMul to a fixed point value with 3 fractional bits. 291*81ad6265SDimitry Andric LMul = Fractional ? (8 / LMul) : (LMul * 8); 292*81ad6265SDimitry Andric 293*81ad6265SDimitry Andric assert(SEW >= 8 && "Unexpected SEW value"); 294*81ad6265SDimitry Andric return (SEW * 8) / LMul; 295*81ad6265SDimitry Andric } 296*81ad6265SDimitry Andric 297*81ad6265SDimitry Andric /// Which subfields of VL or VTYPE have values we need to preserve? 298*81ad6265SDimitry Andric struct DemandedFields { 299*81ad6265SDimitry Andric bool VL = false; 300*81ad6265SDimitry Andric bool SEW = false; 301*81ad6265SDimitry Andric bool LMUL = false; 302*81ad6265SDimitry Andric bool SEWLMULRatio = false; 303*81ad6265SDimitry Andric bool TailPolicy = false; 304*81ad6265SDimitry Andric bool MaskPolicy = false; 305*81ad6265SDimitry Andric 306*81ad6265SDimitry Andric // Return true if any part of VTYPE was used 307*81ad6265SDimitry Andric bool usedVTYPE() { 308*81ad6265SDimitry Andric return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; 309*81ad6265SDimitry Andric } 310*81ad6265SDimitry Andric 311*81ad6265SDimitry Andric // Mark all VTYPE subfields and properties as demanded 312*81ad6265SDimitry Andric void demandVTYPE() { 313*81ad6265SDimitry Andric SEW = true; 314*81ad6265SDimitry Andric LMUL = true; 315*81ad6265SDimitry Andric SEWLMULRatio = true; 316*81ad6265SDimitry Andric TailPolicy = true; 317*81ad6265SDimitry Andric MaskPolicy = true; 318*81ad6265SDimitry Andric } 319*81ad6265SDimitry Andric }; 320*81ad6265SDimitry Andric 321*81ad6265SDimitry Andric /// Return true if the two values of the VTYPE register provided are 322*81ad6265SDimitry Andric /// indistinguishable from the perspective of an instruction (or set of 323*81ad6265SDimitry Andric /// instructions) which use only the Used subfields and properties. 324*81ad6265SDimitry Andric static bool areCompatibleVTYPEs(uint64_t VType1, 325*81ad6265SDimitry Andric uint64_t VType2, 326*81ad6265SDimitry Andric const DemandedFields &Used) { 327*81ad6265SDimitry Andric if (Used.SEW && 328*81ad6265SDimitry Andric RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2)) 329*81ad6265SDimitry Andric return false; 330*81ad6265SDimitry Andric 331*81ad6265SDimitry Andric if (Used.LMUL && 332*81ad6265SDimitry Andric RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2)) 333*81ad6265SDimitry Andric return false; 334*81ad6265SDimitry Andric 335*81ad6265SDimitry Andric if (Used.SEWLMULRatio) { 336*81ad6265SDimitry Andric auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1), 337*81ad6265SDimitry Andric RISCVVType::getVLMUL(VType1)); 338*81ad6265SDimitry Andric auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2), 339*81ad6265SDimitry Andric RISCVVType::getVLMUL(VType2)); 340*81ad6265SDimitry Andric if (Ratio1 != Ratio2) 341*81ad6265SDimitry Andric return false; 342*81ad6265SDimitry Andric } 343*81ad6265SDimitry Andric 344*81ad6265SDimitry Andric if (Used.TailPolicy && 345*81ad6265SDimitry Andric RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2)) 346*81ad6265SDimitry Andric return false; 347*81ad6265SDimitry Andric if (Used.MaskPolicy && 348*81ad6265SDimitry Andric RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2)) 349*81ad6265SDimitry Andric return false; 350*81ad6265SDimitry Andric return true; 351*81ad6265SDimitry Andric } 352*81ad6265SDimitry Andric 353*81ad6265SDimitry Andric /// Return the fields and properties demanded by the provided instruction. 354*81ad6265SDimitry Andric static DemandedFields getDemanded(const MachineInstr &MI) { 355*81ad6265SDimitry Andric // Warning: This function has to work on both the lowered (i.e. post 356*81ad6265SDimitry Andric // emitVSETVLIs) and pre-lowering forms. The main implication of this is 357*81ad6265SDimitry Andric // that it can't use the value of a SEW, VL, or Policy operand as they might 358*81ad6265SDimitry Andric // be stale after lowering. 359*81ad6265SDimitry Andric 360*81ad6265SDimitry Andric // Most instructions don't use any of these subfeilds. 361*81ad6265SDimitry Andric DemandedFields Res; 362*81ad6265SDimitry Andric // Start conservative if registers are used 363*81ad6265SDimitry Andric if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) 364*81ad6265SDimitry Andric Res.VL = true; 365*81ad6265SDimitry Andric if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) 366*81ad6265SDimitry Andric Res.demandVTYPE(); 367*81ad6265SDimitry Andric // Start conservative on the unlowered form too 368*81ad6265SDimitry Andric uint64_t TSFlags = MI.getDesc().TSFlags; 369*81ad6265SDimitry Andric if (RISCVII::hasSEWOp(TSFlags)) { 370*81ad6265SDimitry Andric Res.demandVTYPE(); 371*81ad6265SDimitry Andric if (RISCVII::hasVLOp(TSFlags)) 372*81ad6265SDimitry Andric Res.VL = true; 373*81ad6265SDimitry Andric } 374*81ad6265SDimitry Andric 375*81ad6265SDimitry Andric // Loads and stores with implicit EEW do not demand SEW or LMUL directly. 376*81ad6265SDimitry Andric // They instead demand the ratio of the two which is used in computing 377*81ad6265SDimitry Andric // EMUL, but which allows us the flexibility to change SEW and LMUL 378*81ad6265SDimitry Andric // provided we don't change the ratio. 379*81ad6265SDimitry Andric // Note: We assume that the instructions initial SEW is the EEW encoded 380*81ad6265SDimitry Andric // in the opcode. This is asserted when constructing the VSETVLIInfo. 381*81ad6265SDimitry Andric if (getEEWForLoadStore(MI)) { 382*81ad6265SDimitry Andric Res.SEW = false; 383*81ad6265SDimitry Andric Res.LMUL = false; 384*81ad6265SDimitry Andric } 385*81ad6265SDimitry Andric 386*81ad6265SDimitry Andric // Store instructions don't use the policy fields. 387*81ad6265SDimitry Andric if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { 388*81ad6265SDimitry Andric Res.TailPolicy = false; 389*81ad6265SDimitry Andric Res.MaskPolicy = false; 390*81ad6265SDimitry Andric } 391*81ad6265SDimitry Andric 392*81ad6265SDimitry Andric // If this is a mask reg operation, it only cares about VLMAX. 393*81ad6265SDimitry Andric // TODO: Possible extensions to this logic 394*81ad6265SDimitry Andric // * Probably ok if available VLMax is larger than demanded 395*81ad6265SDimitry Andric // * The policy bits can probably be ignored.. 396*81ad6265SDimitry Andric if (isMaskRegOp(MI)) { 397*81ad6265SDimitry Andric Res.SEW = false; 398*81ad6265SDimitry Andric Res.LMUL = false; 399*81ad6265SDimitry Andric } 400*81ad6265SDimitry Andric 401*81ad6265SDimitry Andric return Res; 402*81ad6265SDimitry Andric } 403*81ad6265SDimitry Andric 404*81ad6265SDimitry Andric /// Defines the abstract state with which the forward dataflow models the 405*81ad6265SDimitry Andric /// values of the VL and VTYPE registers after insertion. 406*81ad6265SDimitry Andric class VSETVLIInfo { 407*81ad6265SDimitry Andric union { 408*81ad6265SDimitry Andric Register AVLReg; 409*81ad6265SDimitry Andric unsigned AVLImm; 410*81ad6265SDimitry Andric }; 411*81ad6265SDimitry Andric 412*81ad6265SDimitry Andric enum : uint8_t { 413*81ad6265SDimitry Andric Uninitialized, 414*81ad6265SDimitry Andric AVLIsReg, 415*81ad6265SDimitry Andric AVLIsImm, 416*81ad6265SDimitry Andric Unknown, 417*81ad6265SDimitry Andric } State = Uninitialized; 418*81ad6265SDimitry Andric 419*81ad6265SDimitry Andric // Fields from VTYPE. 420*81ad6265SDimitry Andric RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 421*81ad6265SDimitry Andric uint8_t SEW = 0; 422*81ad6265SDimitry Andric uint8_t TailAgnostic : 1; 423*81ad6265SDimitry Andric uint8_t MaskAgnostic : 1; 424*81ad6265SDimitry Andric uint8_t SEWLMULRatioOnly : 1; 425*81ad6265SDimitry Andric 426*81ad6265SDimitry Andric public: 427*81ad6265SDimitry Andric VSETVLIInfo() 428*81ad6265SDimitry Andric : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 429*81ad6265SDimitry Andric SEWLMULRatioOnly(false) {} 430*81ad6265SDimitry Andric 431*81ad6265SDimitry Andric static VSETVLIInfo getUnknown() { 432*81ad6265SDimitry Andric VSETVLIInfo Info; 433*81ad6265SDimitry Andric Info.setUnknown(); 434*81ad6265SDimitry Andric return Info; 435*81ad6265SDimitry Andric } 436*81ad6265SDimitry Andric 437*81ad6265SDimitry Andric bool isValid() const { return State != Uninitialized; } 438*81ad6265SDimitry Andric void setUnknown() { State = Unknown; } 439*81ad6265SDimitry Andric bool isUnknown() const { return State == Unknown; } 440*81ad6265SDimitry Andric 441*81ad6265SDimitry Andric void setAVLReg(Register Reg) { 442*81ad6265SDimitry Andric AVLReg = Reg; 443*81ad6265SDimitry Andric State = AVLIsReg; 444*81ad6265SDimitry Andric } 445*81ad6265SDimitry Andric 446*81ad6265SDimitry Andric void setAVLImm(unsigned Imm) { 447*81ad6265SDimitry Andric AVLImm = Imm; 448*81ad6265SDimitry Andric State = AVLIsImm; 449*81ad6265SDimitry Andric } 450*81ad6265SDimitry Andric 451*81ad6265SDimitry Andric bool hasAVLImm() const { return State == AVLIsImm; } 452*81ad6265SDimitry Andric bool hasAVLReg() const { return State == AVLIsReg; } 453*81ad6265SDimitry Andric Register getAVLReg() const { 454*81ad6265SDimitry Andric assert(hasAVLReg()); 455*81ad6265SDimitry Andric return AVLReg; 456*81ad6265SDimitry Andric } 457*81ad6265SDimitry Andric unsigned getAVLImm() const { 458*81ad6265SDimitry Andric assert(hasAVLImm()); 459*81ad6265SDimitry Andric return AVLImm; 460*81ad6265SDimitry Andric } 461*81ad6265SDimitry Andric 462*81ad6265SDimitry Andric unsigned getSEW() const { return SEW; } 463*81ad6265SDimitry Andric RISCVII::VLMUL getVLMUL() const { return VLMul; } 464*81ad6265SDimitry Andric 465*81ad6265SDimitry Andric bool hasNonZeroAVL() const { 466*81ad6265SDimitry Andric if (hasAVLImm()) 467*81ad6265SDimitry Andric return getAVLImm() > 0; 468*81ad6265SDimitry Andric if (hasAVLReg()) 469*81ad6265SDimitry Andric return getAVLReg() == RISCV::X0; 470*81ad6265SDimitry Andric return false; 471*81ad6265SDimitry Andric } 472*81ad6265SDimitry Andric 473*81ad6265SDimitry Andric bool hasSameAVL(const VSETVLIInfo &Other) const { 474*81ad6265SDimitry Andric assert(isValid() && Other.isValid() && 475*81ad6265SDimitry Andric "Can't compare invalid VSETVLIInfos"); 476*81ad6265SDimitry Andric assert(!isUnknown() && !Other.isUnknown() && 477*81ad6265SDimitry Andric "Can't compare AVL in unknown state"); 478*81ad6265SDimitry Andric if (hasAVLReg() && Other.hasAVLReg()) 479*81ad6265SDimitry Andric return getAVLReg() == Other.getAVLReg(); 480*81ad6265SDimitry Andric 481*81ad6265SDimitry Andric if (hasAVLImm() && Other.hasAVLImm()) 482*81ad6265SDimitry Andric return getAVLImm() == Other.getAVLImm(); 483*81ad6265SDimitry Andric 484*81ad6265SDimitry Andric return false; 485*81ad6265SDimitry Andric } 486*81ad6265SDimitry Andric 487*81ad6265SDimitry Andric void setVTYPE(unsigned VType) { 488*81ad6265SDimitry Andric assert(isValid() && !isUnknown() && 489*81ad6265SDimitry Andric "Can't set VTYPE for uninitialized or unknown"); 490*81ad6265SDimitry Andric VLMul = RISCVVType::getVLMUL(VType); 491*81ad6265SDimitry Andric SEW = RISCVVType::getSEW(VType); 492*81ad6265SDimitry Andric TailAgnostic = RISCVVType::isTailAgnostic(VType); 493*81ad6265SDimitry Andric MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 494*81ad6265SDimitry Andric } 495*81ad6265SDimitry Andric void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 496*81ad6265SDimitry Andric assert(isValid() && !isUnknown() && 497*81ad6265SDimitry Andric "Can't set VTYPE for uninitialized or unknown"); 498*81ad6265SDimitry Andric VLMul = L; 499*81ad6265SDimitry Andric SEW = S; 500*81ad6265SDimitry Andric TailAgnostic = TA; 501*81ad6265SDimitry Andric MaskAgnostic = MA; 502*81ad6265SDimitry Andric } 503*81ad6265SDimitry Andric 504*81ad6265SDimitry Andric unsigned encodeVTYPE() const { 505*81ad6265SDimitry Andric assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 506*81ad6265SDimitry Andric "Can't encode VTYPE for uninitialized or unknown"); 507*81ad6265SDimitry Andric return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 508*81ad6265SDimitry Andric } 509*81ad6265SDimitry Andric 510*81ad6265SDimitry Andric bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 511*81ad6265SDimitry Andric 512*81ad6265SDimitry Andric bool hasSameSEW(const VSETVLIInfo &Other) const { 513*81ad6265SDimitry Andric assert(isValid() && Other.isValid() && 514*81ad6265SDimitry Andric "Can't compare invalid VSETVLIInfos"); 515*81ad6265SDimitry Andric assert(!isUnknown() && !Other.isUnknown() && 516*81ad6265SDimitry Andric "Can't compare VTYPE in unknown state"); 517*81ad6265SDimitry Andric assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 518*81ad6265SDimitry Andric "Can't compare when only LMUL/SEW ratio is valid."); 519*81ad6265SDimitry Andric return SEW == Other.SEW; 520*81ad6265SDimitry Andric } 521*81ad6265SDimitry Andric 522*81ad6265SDimitry Andric bool hasSameVTYPE(const VSETVLIInfo &Other) const { 523*81ad6265SDimitry Andric assert(isValid() && Other.isValid() && 524*81ad6265SDimitry Andric "Can't compare invalid VSETVLIInfos"); 525*81ad6265SDimitry Andric assert(!isUnknown() && !Other.isUnknown() && 526*81ad6265SDimitry Andric "Can't compare VTYPE in unknown state"); 527*81ad6265SDimitry Andric assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 528*81ad6265SDimitry Andric "Can't compare when only LMUL/SEW ratio is valid."); 529*81ad6265SDimitry Andric return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 530*81ad6265SDimitry Andric std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 531*81ad6265SDimitry Andric Other.MaskAgnostic); 532*81ad6265SDimitry Andric } 533*81ad6265SDimitry Andric 534*81ad6265SDimitry Andric unsigned getSEWLMULRatio() const { 535*81ad6265SDimitry Andric assert(isValid() && !isUnknown() && 536*81ad6265SDimitry Andric "Can't use VTYPE for uninitialized or unknown"); 537*81ad6265SDimitry Andric return ::getSEWLMULRatio(SEW, VLMul); 538*81ad6265SDimitry Andric } 539*81ad6265SDimitry Andric 540*81ad6265SDimitry Andric // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 541*81ad6265SDimitry Andric // Note that having the same VLMAX ensures that both share the same 542*81ad6265SDimitry Andric // function from AVL to VL; that is, they must produce the same VL value 543*81ad6265SDimitry Andric // for any given AVL value. 544*81ad6265SDimitry Andric bool hasSameVLMAX(const VSETVLIInfo &Other) const { 545*81ad6265SDimitry Andric assert(isValid() && Other.isValid() && 546*81ad6265SDimitry Andric "Can't compare invalid VSETVLIInfos"); 547*81ad6265SDimitry Andric assert(!isUnknown() && !Other.isUnknown() && 548*81ad6265SDimitry Andric "Can't compare VTYPE in unknown state"); 549*81ad6265SDimitry Andric return getSEWLMULRatio() == Other.getSEWLMULRatio(); 550*81ad6265SDimitry Andric } 551*81ad6265SDimitry Andric 552*81ad6265SDimitry Andric bool hasSamePolicy(const VSETVLIInfo &Other) const { 553*81ad6265SDimitry Andric assert(isValid() && Other.isValid() && 554*81ad6265SDimitry Andric "Can't compare invalid VSETVLIInfos"); 555*81ad6265SDimitry Andric assert(!isUnknown() && !Other.isUnknown() && 556*81ad6265SDimitry Andric "Can't compare VTYPE in unknown state"); 557*81ad6265SDimitry Andric return TailAgnostic == Other.TailAgnostic && 558*81ad6265SDimitry Andric MaskAgnostic == Other.MaskAgnostic; 559*81ad6265SDimitry Andric } 560*81ad6265SDimitry Andric 561*81ad6265SDimitry Andric bool hasCompatibleVTYPE(const MachineInstr &MI, 562*81ad6265SDimitry Andric const VSETVLIInfo &Require) const { 563*81ad6265SDimitry Andric const DemandedFields Used = getDemanded(MI); 564*81ad6265SDimitry Andric return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); 565*81ad6265SDimitry Andric } 566*81ad6265SDimitry Andric 567*81ad6265SDimitry Andric // Determine whether the vector instructions requirements represented by 568*81ad6265SDimitry Andric // Require are compatible with the previous vsetvli instruction represented 569*81ad6265SDimitry Andric // by this. MI is the instruction whose requirements we're considering. 570*81ad6265SDimitry Andric bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const { 571*81ad6265SDimitry Andric assert(isValid() && Require.isValid() && 572*81ad6265SDimitry Andric "Can't compare invalid VSETVLIInfos"); 573*81ad6265SDimitry Andric assert(!Require.SEWLMULRatioOnly && 574*81ad6265SDimitry Andric "Expected a valid VTYPE for instruction!"); 575*81ad6265SDimitry Andric // Nothing is compatible with Unknown. 576*81ad6265SDimitry Andric if (isUnknown() || Require.isUnknown()) 577*81ad6265SDimitry Andric return false; 578*81ad6265SDimitry Andric 579*81ad6265SDimitry Andric // If only our VLMAX ratio is valid, then this isn't compatible. 580*81ad6265SDimitry Andric if (SEWLMULRatioOnly) 581*81ad6265SDimitry Andric return false; 582*81ad6265SDimitry Andric 583*81ad6265SDimitry Andric // If the instruction doesn't need an AVLReg and the SEW matches, consider 584*81ad6265SDimitry Andric // it compatible. 585*81ad6265SDimitry Andric if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister) 586*81ad6265SDimitry Andric if (SEW == Require.SEW) 587*81ad6265SDimitry Andric return true; 588*81ad6265SDimitry Andric 589*81ad6265SDimitry Andric return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require); 590*81ad6265SDimitry Andric } 591*81ad6265SDimitry Andric 592*81ad6265SDimitry Andric bool operator==(const VSETVLIInfo &Other) const { 593*81ad6265SDimitry Andric // Uninitialized is only equal to another Uninitialized. 594*81ad6265SDimitry Andric if (!isValid()) 595*81ad6265SDimitry Andric return !Other.isValid(); 596*81ad6265SDimitry Andric if (!Other.isValid()) 597*81ad6265SDimitry Andric return !isValid(); 598*81ad6265SDimitry Andric 599*81ad6265SDimitry Andric // Unknown is only equal to another Unknown. 600*81ad6265SDimitry Andric if (isUnknown()) 601*81ad6265SDimitry Andric return Other.isUnknown(); 602*81ad6265SDimitry Andric if (Other.isUnknown()) 603*81ad6265SDimitry Andric return isUnknown(); 604*81ad6265SDimitry Andric 605*81ad6265SDimitry Andric if (!hasSameAVL(Other)) 606*81ad6265SDimitry Andric return false; 607*81ad6265SDimitry Andric 608*81ad6265SDimitry Andric // If the SEWLMULRatioOnly bits are different, then they aren't equal. 609*81ad6265SDimitry Andric if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 610*81ad6265SDimitry Andric return false; 611*81ad6265SDimitry Andric 612*81ad6265SDimitry Andric // If only the VLMAX is valid, check that it is the same. 613*81ad6265SDimitry Andric if (SEWLMULRatioOnly) 614*81ad6265SDimitry Andric return hasSameVLMAX(Other); 615*81ad6265SDimitry Andric 616*81ad6265SDimitry Andric // If the full VTYPE is valid, check that it is the same. 617*81ad6265SDimitry Andric return hasSameVTYPE(Other); 618*81ad6265SDimitry Andric } 619*81ad6265SDimitry Andric 620*81ad6265SDimitry Andric bool operator!=(const VSETVLIInfo &Other) const { 621*81ad6265SDimitry Andric return !(*this == Other); 622*81ad6265SDimitry Andric } 623*81ad6265SDimitry Andric 624*81ad6265SDimitry Andric // Calculate the VSETVLIInfo visible to a block assuming this and Other are 625*81ad6265SDimitry Andric // both predecessors. 626*81ad6265SDimitry Andric VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 627*81ad6265SDimitry Andric // If the new value isn't valid, ignore it. 628*81ad6265SDimitry Andric if (!Other.isValid()) 629*81ad6265SDimitry Andric return *this; 630*81ad6265SDimitry Andric 631*81ad6265SDimitry Andric // If this value isn't valid, this must be the first predecessor, use it. 632*81ad6265SDimitry Andric if (!isValid()) 633*81ad6265SDimitry Andric return Other; 634*81ad6265SDimitry Andric 635*81ad6265SDimitry Andric // If either is unknown, the result is unknown. 636*81ad6265SDimitry Andric if (isUnknown() || Other.isUnknown()) 637*81ad6265SDimitry Andric return VSETVLIInfo::getUnknown(); 638*81ad6265SDimitry Andric 639*81ad6265SDimitry Andric // If we have an exact, match return this. 640*81ad6265SDimitry Andric if (*this == Other) 641*81ad6265SDimitry Andric return *this; 642*81ad6265SDimitry Andric 643*81ad6265SDimitry Andric // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 644*81ad6265SDimitry Andric // return an SEW/LMUL ratio only value. 645*81ad6265SDimitry Andric if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 646*81ad6265SDimitry Andric VSETVLIInfo MergeInfo = *this; 647*81ad6265SDimitry Andric MergeInfo.SEWLMULRatioOnly = true; 648*81ad6265SDimitry Andric return MergeInfo; 649*81ad6265SDimitry Andric } 650*81ad6265SDimitry Andric 651*81ad6265SDimitry Andric // Otherwise the result is unknown. 652*81ad6265SDimitry Andric return VSETVLIInfo::getUnknown(); 653*81ad6265SDimitry Andric } 654*81ad6265SDimitry Andric 655*81ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 656*81ad6265SDimitry Andric /// Support for debugging, callable in GDB: V->dump() 657*81ad6265SDimitry Andric LLVM_DUMP_METHOD void dump() const { 658*81ad6265SDimitry Andric print(dbgs()); 659*81ad6265SDimitry Andric dbgs() << "\n"; 660*81ad6265SDimitry Andric } 661*81ad6265SDimitry Andric 662*81ad6265SDimitry Andric /// Implement operator<<. 663*81ad6265SDimitry Andric /// @{ 664*81ad6265SDimitry Andric void print(raw_ostream &OS) const { 665*81ad6265SDimitry Andric OS << "{"; 666*81ad6265SDimitry Andric if (!isValid()) 667*81ad6265SDimitry Andric OS << "Uninitialized"; 668*81ad6265SDimitry Andric if (isUnknown()) 669*81ad6265SDimitry Andric OS << "unknown"; 670*81ad6265SDimitry Andric if (hasAVLReg()) 671*81ad6265SDimitry Andric OS << "AVLReg=" << (unsigned)AVLReg; 672*81ad6265SDimitry Andric if (hasAVLImm()) 673*81ad6265SDimitry Andric OS << "AVLImm=" << (unsigned)AVLImm; 674*81ad6265SDimitry Andric OS << ", " 675*81ad6265SDimitry Andric << "VLMul=" << (unsigned)VLMul << ", " 676*81ad6265SDimitry Andric << "SEW=" << (unsigned)SEW << ", " 677*81ad6265SDimitry Andric << "TailAgnostic=" << (bool)TailAgnostic << ", " 678*81ad6265SDimitry Andric << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 679*81ad6265SDimitry Andric << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 680*81ad6265SDimitry Andric } 681*81ad6265SDimitry Andric #endif 682*81ad6265SDimitry Andric }; 683*81ad6265SDimitry Andric 684*81ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 685*81ad6265SDimitry Andric LLVM_ATTRIBUTE_USED 686*81ad6265SDimitry Andric inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 687*81ad6265SDimitry Andric V.print(OS); 688*81ad6265SDimitry Andric return OS; 689*81ad6265SDimitry Andric } 690*81ad6265SDimitry Andric #endif 691*81ad6265SDimitry Andric 692*81ad6265SDimitry Andric struct BlockData { 693*81ad6265SDimitry Andric // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers 694*81ad6265SDimitry Andric // made by this block. Calculated in Phase 1. 695*81ad6265SDimitry Andric VSETVLIInfo Change; 696*81ad6265SDimitry Andric 697*81ad6265SDimitry Andric // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 698*81ad6265SDimitry Andric // block. Calculated in Phase 2. 699*81ad6265SDimitry Andric VSETVLIInfo Exit; 700*81ad6265SDimitry Andric 701*81ad6265SDimitry Andric // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 702*81ad6265SDimitry Andric // blocks. Calculated in Phase 2, and used by Phase 3. 703*81ad6265SDimitry Andric VSETVLIInfo Pred; 704*81ad6265SDimitry Andric 705*81ad6265SDimitry Andric // Keeps track of whether the block is already in the queue. 706*81ad6265SDimitry Andric bool InQueue = false; 707*81ad6265SDimitry Andric 708*81ad6265SDimitry Andric BlockData() = default; 709*81ad6265SDimitry Andric }; 710*81ad6265SDimitry Andric 711*81ad6265SDimitry Andric class RISCVInsertVSETVLI : public MachineFunctionPass { 712*81ad6265SDimitry Andric const TargetInstrInfo *TII; 713*81ad6265SDimitry Andric MachineRegisterInfo *MRI; 714*81ad6265SDimitry Andric 715*81ad6265SDimitry Andric std::vector<BlockData> BlockInfo; 716*81ad6265SDimitry Andric std::queue<const MachineBasicBlock *> WorkList; 717*81ad6265SDimitry Andric 718*81ad6265SDimitry Andric public: 719*81ad6265SDimitry Andric static char ID; 720*81ad6265SDimitry Andric 721*81ad6265SDimitry Andric RISCVInsertVSETVLI() : MachineFunctionPass(ID) { 722*81ad6265SDimitry Andric initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); 723*81ad6265SDimitry Andric } 724*81ad6265SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 725*81ad6265SDimitry Andric 726*81ad6265SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 727*81ad6265SDimitry Andric AU.setPreservesCFG(); 728*81ad6265SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 729*81ad6265SDimitry Andric } 730*81ad6265SDimitry Andric 731*81ad6265SDimitry Andric StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 732*81ad6265SDimitry Andric 733*81ad6265SDimitry Andric private: 734*81ad6265SDimitry Andric bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, 735*81ad6265SDimitry Andric const VSETVLIInfo &CurInfo) const; 736*81ad6265SDimitry Andric bool needVSETVLIPHI(const VSETVLIInfo &Require, 737*81ad6265SDimitry Andric const MachineBasicBlock &MBB) const; 738*81ad6265SDimitry Andric void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 739*81ad6265SDimitry Andric const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 740*81ad6265SDimitry Andric void insertVSETVLI(MachineBasicBlock &MBB, 741*81ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt, DebugLoc DL, 742*81ad6265SDimitry Andric const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 743*81ad6265SDimitry Andric 744*81ad6265SDimitry Andric void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI); 745*81ad6265SDimitry Andric void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI); 746*81ad6265SDimitry Andric bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); 747*81ad6265SDimitry Andric void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 748*81ad6265SDimitry Andric void emitVSETVLIs(MachineBasicBlock &MBB); 749*81ad6265SDimitry Andric void doLocalPostpass(MachineBasicBlock &MBB); 750*81ad6265SDimitry Andric void doPRE(MachineBasicBlock &MBB); 751*81ad6265SDimitry Andric void insertReadVL(MachineBasicBlock &MBB); 752*81ad6265SDimitry Andric }; 753*81ad6265SDimitry Andric 754*81ad6265SDimitry Andric } // end anonymous namespace 755*81ad6265SDimitry Andric 756*81ad6265SDimitry Andric char RISCVInsertVSETVLI::ID = 0; 757*81ad6265SDimitry Andric 758*81ad6265SDimitry Andric INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 759*81ad6265SDimitry Andric false, false) 760*81ad6265SDimitry Andric 761*81ad6265SDimitry Andric static bool isVectorConfigInstr(const MachineInstr &MI) { 762*81ad6265SDimitry Andric return MI.getOpcode() == RISCV::PseudoVSETVLI || 763*81ad6265SDimitry Andric MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 764*81ad6265SDimitry Andric MI.getOpcode() == RISCV::PseudoVSETIVLI; 765*81ad6265SDimitry Andric } 766*81ad6265SDimitry Andric 767*81ad6265SDimitry Andric /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 768*81ad6265SDimitry Andric /// VL and only sets VTYPE. 769*81ad6265SDimitry Andric static bool isVLPreservingConfig(const MachineInstr &MI) { 770*81ad6265SDimitry Andric if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 771*81ad6265SDimitry Andric return false; 772*81ad6265SDimitry Andric assert(RISCV::X0 == MI.getOperand(1).getReg()); 773*81ad6265SDimitry Andric return RISCV::X0 == MI.getOperand(0).getReg(); 774*81ad6265SDimitry Andric } 775*81ad6265SDimitry Andric 776*81ad6265SDimitry Andric static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 777*81ad6265SDimitry Andric const MachineRegisterInfo *MRI) { 778*81ad6265SDimitry Andric VSETVLIInfo InstrInfo; 779*81ad6265SDimitry Andric 780*81ad6265SDimitry Andric // If the instruction has policy argument, use the argument. 781*81ad6265SDimitry Andric // If there is no policy argument, default to tail agnostic unless the 782*81ad6265SDimitry Andric // destination is tied to a source. Unless the source is undef. In that case 783*81ad6265SDimitry Andric // the user would have some control over the policy values. 784*81ad6265SDimitry Andric bool TailAgnostic = true; 785*81ad6265SDimitry Andric bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags); 786*81ad6265SDimitry Andric // FIXME: Could we look at the above or below instructions to choose the 787*81ad6265SDimitry Andric // matched mask policy to reduce vsetvli instructions? Default mask policy is 788*81ad6265SDimitry Andric // agnostic if instructions use mask policy, otherwise is undisturbed. Because 789*81ad6265SDimitry Andric // most mask operations are mask undisturbed, so we could possibly reduce the 790*81ad6265SDimitry Andric // vsetvli between mask and nomasked instruction sequence. 791*81ad6265SDimitry Andric bool MaskAgnostic = UsesMaskPolicy; 792*81ad6265SDimitry Andric unsigned UseOpIdx; 793*81ad6265SDimitry Andric if (RISCVII::hasVecPolicyOp(TSFlags)) { 794*81ad6265SDimitry Andric const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 795*81ad6265SDimitry Andric uint64_t Policy = Op.getImm(); 796*81ad6265SDimitry Andric assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 797*81ad6265SDimitry Andric "Invalid Policy Value"); 798*81ad6265SDimitry Andric // Although in some cases, mismatched passthru/maskedoff with policy value 799*81ad6265SDimitry Andric // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA 800*81ad6265SDimitry Andric // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users 801*81ad6265SDimitry Andric // have set the policy value explicitly, so compiler would not fix it. 802*81ad6265SDimitry Andric TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 803*81ad6265SDimitry Andric MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 804*81ad6265SDimitry Andric } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 805*81ad6265SDimitry Andric TailAgnostic = false; 806*81ad6265SDimitry Andric if (UsesMaskPolicy) 807*81ad6265SDimitry Andric MaskAgnostic = false; 808*81ad6265SDimitry Andric // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 809*81ad6265SDimitry Andric const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 810*81ad6265SDimitry Andric MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); 811*81ad6265SDimitry Andric if (UseMI && UseMI->isImplicitDef()) { 812*81ad6265SDimitry Andric TailAgnostic = true; 813*81ad6265SDimitry Andric if (UsesMaskPolicy) 814*81ad6265SDimitry Andric MaskAgnostic = true; 815*81ad6265SDimitry Andric } 816*81ad6265SDimitry Andric // Some pseudo instructions force a tail agnostic policy despite having a 817*81ad6265SDimitry Andric // tied def. 818*81ad6265SDimitry Andric if (RISCVII::doesForceTailAgnostic(TSFlags)) 819*81ad6265SDimitry Andric TailAgnostic = true; 820*81ad6265SDimitry Andric } 821*81ad6265SDimitry Andric 822*81ad6265SDimitry Andric RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 823*81ad6265SDimitry Andric 824*81ad6265SDimitry Andric unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 825*81ad6265SDimitry Andric // A Log2SEW of 0 is an operation on mask registers only. 826*81ad6265SDimitry Andric unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 827*81ad6265SDimitry Andric assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 828*81ad6265SDimitry Andric 829*81ad6265SDimitry Andric if (RISCVII::hasVLOp(TSFlags)) { 830*81ad6265SDimitry Andric const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 831*81ad6265SDimitry Andric if (VLOp.isImm()) { 832*81ad6265SDimitry Andric int64_t Imm = VLOp.getImm(); 833*81ad6265SDimitry Andric // Conver the VLMax sentintel to X0 register. 834*81ad6265SDimitry Andric if (Imm == RISCV::VLMaxSentinel) 835*81ad6265SDimitry Andric InstrInfo.setAVLReg(RISCV::X0); 836*81ad6265SDimitry Andric else 837*81ad6265SDimitry Andric InstrInfo.setAVLImm(Imm); 838*81ad6265SDimitry Andric } else { 839*81ad6265SDimitry Andric InstrInfo.setAVLReg(VLOp.getReg()); 840*81ad6265SDimitry Andric } 841*81ad6265SDimitry Andric } else { 842*81ad6265SDimitry Andric InstrInfo.setAVLReg(RISCV::NoRegister); 843*81ad6265SDimitry Andric } 844*81ad6265SDimitry Andric #ifndef NDEBUG 845*81ad6265SDimitry Andric if (Optional<unsigned> EEW = getEEWForLoadStore(MI)) { 846*81ad6265SDimitry Andric assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); 847*81ad6265SDimitry Andric } 848*81ad6265SDimitry Andric #endif 849*81ad6265SDimitry Andric InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 850*81ad6265SDimitry Andric 851*81ad6265SDimitry Andric return InstrInfo; 852*81ad6265SDimitry Andric } 853*81ad6265SDimitry Andric 854*81ad6265SDimitry Andric void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 855*81ad6265SDimitry Andric const VSETVLIInfo &Info, 856*81ad6265SDimitry Andric const VSETVLIInfo &PrevInfo) { 857*81ad6265SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 858*81ad6265SDimitry Andric insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); 859*81ad6265SDimitry Andric } 860*81ad6265SDimitry Andric 861*81ad6265SDimitry Andric void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 862*81ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt, DebugLoc DL, 863*81ad6265SDimitry Andric const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 864*81ad6265SDimitry Andric 865*81ad6265SDimitry Andric // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 866*81ad6265SDimitry Andric // VLMAX. 867*81ad6265SDimitry Andric if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 868*81ad6265SDimitry Andric Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 869*81ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 870*81ad6265SDimitry Andric .addReg(RISCV::X0, RegState::Define | RegState::Dead) 871*81ad6265SDimitry Andric .addReg(RISCV::X0, RegState::Kill) 872*81ad6265SDimitry Andric .addImm(Info.encodeVTYPE()) 873*81ad6265SDimitry Andric .addReg(RISCV::VL, RegState::Implicit); 874*81ad6265SDimitry Andric return; 875*81ad6265SDimitry Andric } 876*81ad6265SDimitry Andric 877*81ad6265SDimitry Andric if (Info.hasAVLImm()) { 878*81ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 879*81ad6265SDimitry Andric .addReg(RISCV::X0, RegState::Define | RegState::Dead) 880*81ad6265SDimitry Andric .addImm(Info.getAVLImm()) 881*81ad6265SDimitry Andric .addImm(Info.encodeVTYPE()); 882*81ad6265SDimitry Andric return; 883*81ad6265SDimitry Andric } 884*81ad6265SDimitry Andric 885*81ad6265SDimitry Andric Register AVLReg = Info.getAVLReg(); 886*81ad6265SDimitry Andric if (AVLReg == RISCV::NoRegister) { 887*81ad6265SDimitry Andric // We can only use x0, x0 if there's no chance of the vtype change causing 888*81ad6265SDimitry Andric // the previous vl to become invalid. 889*81ad6265SDimitry Andric if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 890*81ad6265SDimitry Andric Info.hasSameVLMAX(PrevInfo)) { 891*81ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 892*81ad6265SDimitry Andric .addReg(RISCV::X0, RegState::Define | RegState::Dead) 893*81ad6265SDimitry Andric .addReg(RISCV::X0, RegState::Kill) 894*81ad6265SDimitry Andric .addImm(Info.encodeVTYPE()) 895*81ad6265SDimitry Andric .addReg(RISCV::VL, RegState::Implicit); 896*81ad6265SDimitry Andric return; 897*81ad6265SDimitry Andric } 898*81ad6265SDimitry Andric // Otherwise use an AVL of 0 to avoid depending on previous vl. 899*81ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 900*81ad6265SDimitry Andric .addReg(RISCV::X0, RegState::Define | RegState::Dead) 901*81ad6265SDimitry Andric .addImm(0) 902*81ad6265SDimitry Andric .addImm(Info.encodeVTYPE()); 903*81ad6265SDimitry Andric return; 904*81ad6265SDimitry Andric } 905*81ad6265SDimitry Andric 906*81ad6265SDimitry Andric if (AVLReg.isVirtual()) 907*81ad6265SDimitry Andric MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 908*81ad6265SDimitry Andric 909*81ad6265SDimitry Andric // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 910*81ad6265SDimitry Andric // opcode if the AVLReg is X0 as they have different register classes for 911*81ad6265SDimitry Andric // the AVL operand. 912*81ad6265SDimitry Andric Register DestReg = RISCV::X0; 913*81ad6265SDimitry Andric unsigned Opcode = RISCV::PseudoVSETVLI; 914*81ad6265SDimitry Andric if (AVLReg == RISCV::X0) { 915*81ad6265SDimitry Andric DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 916*81ad6265SDimitry Andric Opcode = RISCV::PseudoVSETVLIX0; 917*81ad6265SDimitry Andric } 918*81ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, TII->get(Opcode)) 919*81ad6265SDimitry Andric .addReg(DestReg, RegState::Define | RegState::Dead) 920*81ad6265SDimitry Andric .addReg(AVLReg) 921*81ad6265SDimitry Andric .addImm(Info.encodeVTYPE()); 922*81ad6265SDimitry Andric } 923*81ad6265SDimitry Andric 924*81ad6265SDimitry Andric // Return a VSETVLIInfo representing the changes made by this VSETVLI or 925*81ad6265SDimitry Andric // VSETIVLI instruction. 926*81ad6265SDimitry Andric static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 927*81ad6265SDimitry Andric VSETVLIInfo NewInfo; 928*81ad6265SDimitry Andric if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 929*81ad6265SDimitry Andric NewInfo.setAVLImm(MI.getOperand(1).getImm()); 930*81ad6265SDimitry Andric } else { 931*81ad6265SDimitry Andric assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 932*81ad6265SDimitry Andric MI.getOpcode() == RISCV::PseudoVSETVLIX0); 933*81ad6265SDimitry Andric Register AVLReg = MI.getOperand(1).getReg(); 934*81ad6265SDimitry Andric assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 935*81ad6265SDimitry Andric "Can't handle X0, X0 vsetvli yet"); 936*81ad6265SDimitry Andric NewInfo.setAVLReg(AVLReg); 937*81ad6265SDimitry Andric } 938*81ad6265SDimitry Andric NewInfo.setVTYPE(MI.getOperand(2).getImm()); 939*81ad6265SDimitry Andric 940*81ad6265SDimitry Andric return NewInfo; 941*81ad6265SDimitry Andric } 942*81ad6265SDimitry Andric 943*81ad6265SDimitry Andric /// Return true if a VSETVLI is required to transition from CurInfo to Require 944*81ad6265SDimitry Andric /// before MI. 945*81ad6265SDimitry Andric bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, 946*81ad6265SDimitry Andric const VSETVLIInfo &Require, 947*81ad6265SDimitry Andric const VSETVLIInfo &CurInfo) const { 948*81ad6265SDimitry Andric assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); 949*81ad6265SDimitry Andric 950*81ad6265SDimitry Andric if (CurInfo.isCompatible(MI, Require)) 951*81ad6265SDimitry Andric return false; 952*81ad6265SDimitry Andric 953*81ad6265SDimitry Andric if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) 954*81ad6265SDimitry Andric return true; 955*81ad6265SDimitry Andric 956*81ad6265SDimitry Andric // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. 957*81ad6265SDimitry Andric // VL=0 is uninteresting (as it should have been deleted already), so it is 958*81ad6265SDimitry Andric // compatible if we can prove both are non-zero. Additionally, if writing 959*81ad6265SDimitry Andric // to an implicit_def operand, we don't need to preserve any other bits and 960*81ad6265SDimitry Andric // are thus compatible with any larger etype, and can disregard policy bits. 961*81ad6265SDimitry Andric if (isScalarMoveInstr(MI) && 962*81ad6265SDimitry Andric CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) { 963*81ad6265SDimitry Andric auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); 964*81ad6265SDimitry Andric if (VRegDef && VRegDef->isImplicitDef() && 965*81ad6265SDimitry Andric CurInfo.getSEW() >= Require.getSEW()) 966*81ad6265SDimitry Andric return false; 967*81ad6265SDimitry Andric if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require)) 968*81ad6265SDimitry Andric return false; 969*81ad6265SDimitry Andric } 970*81ad6265SDimitry Andric 971*81ad6265SDimitry Andric // We didn't find a compatible value. If our AVL is a virtual register, 972*81ad6265SDimitry Andric // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need 973*81ad6265SDimitry Andric // and the last VL/VTYPE we observed is the same, we don't need a 974*81ad6265SDimitry Andric // VSETVLI here. 975*81ad6265SDimitry Andric if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && 976*81ad6265SDimitry Andric CurInfo.hasCompatibleVTYPE(MI, Require)) { 977*81ad6265SDimitry Andric if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 978*81ad6265SDimitry Andric if (isVectorConfigInstr(*DefMI)) { 979*81ad6265SDimitry Andric VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 980*81ad6265SDimitry Andric if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo)) 981*81ad6265SDimitry Andric return false; 982*81ad6265SDimitry Andric } 983*81ad6265SDimitry Andric } 984*81ad6265SDimitry Andric } 985*81ad6265SDimitry Andric 986*81ad6265SDimitry Andric return true; 987*81ad6265SDimitry Andric } 988*81ad6265SDimitry Andric 989*81ad6265SDimitry Andric // Given an incoming state reaching MI, modifies that state so that it is minimally 990*81ad6265SDimitry Andric // compatible with MI. The resulting state is guaranteed to be semantically legal 991*81ad6265SDimitry Andric // for MI, but may not be the state requested by MI. 992*81ad6265SDimitry Andric void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) { 993*81ad6265SDimitry Andric uint64_t TSFlags = MI.getDesc().TSFlags; 994*81ad6265SDimitry Andric if (!RISCVII::hasSEWOp(TSFlags)) 995*81ad6265SDimitry Andric return; 996*81ad6265SDimitry Andric 997*81ad6265SDimitry Andric const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 998*81ad6265SDimitry Andric if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) 999*81ad6265SDimitry Andric return; 1000*81ad6265SDimitry Andric 1001*81ad6265SDimitry Andric const VSETVLIInfo PrevInfo = Info; 1002*81ad6265SDimitry Andric Info = NewInfo; 1003*81ad6265SDimitry Andric 1004*81ad6265SDimitry Andric if (!RISCVII::hasVLOp(TSFlags)) 1005*81ad6265SDimitry Andric return; 1006*81ad6265SDimitry Andric 1007*81ad6265SDimitry Andric // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and 1008*81ad6265SDimitry Andric // VL > 0. We can discard the user requested AVL and just use the last 1009*81ad6265SDimitry Andric // one if we can prove it equally zero. This removes a vsetvli entirely 1010*81ad6265SDimitry Andric // if the types match or allows use of cheaper avl preserving variant 1011*81ad6265SDimitry Andric // if VLMAX doesn't change. If VLMAX might change, we couldn't use 1012*81ad6265SDimitry Andric // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to 1013*81ad6265SDimitry Andric // prevent extending live range of an avl register operand. 1014*81ad6265SDimitry Andric // TODO: We can probably relax this for immediates. 1015*81ad6265SDimitry Andric if (isScalarMoveInstr(MI) && PrevInfo.isValid() && 1016*81ad6265SDimitry Andric PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() && 1017*81ad6265SDimitry Andric Info.hasSameVLMAX(PrevInfo)) { 1018*81ad6265SDimitry Andric if (PrevInfo.hasAVLImm()) 1019*81ad6265SDimitry Andric Info.setAVLImm(PrevInfo.getAVLImm()); 1020*81ad6265SDimitry Andric else 1021*81ad6265SDimitry Andric Info.setAVLReg(PrevInfo.getAVLReg()); 1022*81ad6265SDimitry Andric return; 1023*81ad6265SDimitry Andric } 1024*81ad6265SDimitry Andric 1025*81ad6265SDimitry Andric // Two cases involving an AVL resulting from a previous vsetvli. 1026*81ad6265SDimitry Andric // 1) If the AVL is the result of a previous vsetvli which has the 1027*81ad6265SDimitry Andric // same AVL and VLMAX as our current state, we can reuse the AVL 1028*81ad6265SDimitry Andric // from the current state for the new one. This allows us to 1029*81ad6265SDimitry Andric // generate 'vsetvli x0, x0, vtype" or possible skip the transition 1030*81ad6265SDimitry Andric // entirely. 1031*81ad6265SDimitry Andric // 2) If AVL is defined by a vsetvli with the same VLMAX, we can 1032*81ad6265SDimitry Andric // replace the AVL operand with the AVL of the defining vsetvli. 1033*81ad6265SDimitry Andric // We avoid general register AVLs to avoid extending live ranges 1034*81ad6265SDimitry Andric // without being sure we can kill the original source reg entirely. 1035*81ad6265SDimitry Andric if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) 1036*81ad6265SDimitry Andric return; 1037*81ad6265SDimitry Andric MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); 1038*81ad6265SDimitry Andric if (!DefMI || !isVectorConfigInstr(*DefMI)) 1039*81ad6265SDimitry Andric return; 1040*81ad6265SDimitry Andric 1041*81ad6265SDimitry Andric VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1042*81ad6265SDimitry Andric // case 1 1043*81ad6265SDimitry Andric if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 1044*81ad6265SDimitry Andric DefInfo.hasSameAVL(PrevInfo) && 1045*81ad6265SDimitry Andric DefInfo.hasSameVLMAX(PrevInfo)) { 1046*81ad6265SDimitry Andric if (PrevInfo.hasAVLImm()) 1047*81ad6265SDimitry Andric Info.setAVLImm(PrevInfo.getAVLImm()); 1048*81ad6265SDimitry Andric else 1049*81ad6265SDimitry Andric Info.setAVLReg(PrevInfo.getAVLReg()); 1050*81ad6265SDimitry Andric return; 1051*81ad6265SDimitry Andric } 1052*81ad6265SDimitry Andric // case 2 1053*81ad6265SDimitry Andric if (DefInfo.hasSameVLMAX(Info) && 1054*81ad6265SDimitry Andric (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { 1055*81ad6265SDimitry Andric if (DefInfo.hasAVLImm()) 1056*81ad6265SDimitry Andric Info.setAVLImm(DefInfo.getAVLImm()); 1057*81ad6265SDimitry Andric else 1058*81ad6265SDimitry Andric Info.setAVLReg(DefInfo.getAVLReg()); 1059*81ad6265SDimitry Andric return; 1060*81ad6265SDimitry Andric } 1061*81ad6265SDimitry Andric } 1062*81ad6265SDimitry Andric 1063*81ad6265SDimitry Andric // Given a state with which we evaluated MI (see transferBefore above for why 1064*81ad6265SDimitry Andric // this might be different that the state MI requested), modify the state to 1065*81ad6265SDimitry Andric // reflect the changes MI might make. 1066*81ad6265SDimitry Andric void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) { 1067*81ad6265SDimitry Andric if (isVectorConfigInstr(MI)) { 1068*81ad6265SDimitry Andric Info = getInfoForVSETVLI(MI); 1069*81ad6265SDimitry Andric return; 1070*81ad6265SDimitry Andric } 1071*81ad6265SDimitry Andric 1072*81ad6265SDimitry Andric if (RISCV::isFaultFirstLoad(MI)) { 1073*81ad6265SDimitry Andric // Update AVL to vl-output of the fault first load. 1074*81ad6265SDimitry Andric Info.setAVLReg(MI.getOperand(1).getReg()); 1075*81ad6265SDimitry Andric return; 1076*81ad6265SDimitry Andric } 1077*81ad6265SDimitry Andric 1078*81ad6265SDimitry Andric // If this is something that updates VL/VTYPE that we don't know about, set 1079*81ad6265SDimitry Andric // the state to unknown. 1080*81ad6265SDimitry Andric if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1081*81ad6265SDimitry Andric MI.modifiesRegister(RISCV::VTYPE)) 1082*81ad6265SDimitry Andric Info = VSETVLIInfo::getUnknown(); 1083349cc55cSDimitry Andric } 1084349cc55cSDimitry Andric 1085fe6060f1SDimitry Andric bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { 1086fe6060f1SDimitry Andric bool HadVectorOp = false; 1087fe6060f1SDimitry Andric 1088fe6060f1SDimitry Andric BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1089*81ad6265SDimitry Andric BBInfo.Change = BBInfo.Pred; 1090fe6060f1SDimitry Andric for (const MachineInstr &MI : MBB) { 1091*81ad6265SDimitry Andric transferBefore(BBInfo.Change, MI); 1092fe6060f1SDimitry Andric 1093*81ad6265SDimitry Andric if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 1094fe6060f1SDimitry Andric HadVectorOp = true; 1095fe6060f1SDimitry Andric 1096*81ad6265SDimitry Andric transferAfter(BBInfo.Change, MI); 1097fe6060f1SDimitry Andric } 1098fe6060f1SDimitry Andric 1099fe6060f1SDimitry Andric return HadVectorOp; 1100fe6060f1SDimitry Andric } 1101fe6060f1SDimitry Andric 1102fe6060f1SDimitry Andric void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 1103*81ad6265SDimitry Andric 1104fe6060f1SDimitry Andric BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1105fe6060f1SDimitry Andric 1106fe6060f1SDimitry Andric BBInfo.InQueue = false; 1107fe6060f1SDimitry Andric 1108fe6060f1SDimitry Andric VSETVLIInfo InInfo; 1109fe6060f1SDimitry Andric if (MBB.pred_empty()) { 1110fe6060f1SDimitry Andric // There are no predecessors, so use the default starting status. 1111fe6060f1SDimitry Andric InInfo.setUnknown(); 1112fe6060f1SDimitry Andric } else { 1113fe6060f1SDimitry Andric for (MachineBasicBlock *P : MBB.predecessors()) 1114fe6060f1SDimitry Andric InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 1115fe6060f1SDimitry Andric } 1116fe6060f1SDimitry Andric 1117fe6060f1SDimitry Andric // If we don't have any valid predecessor value, wait until we do. 1118fe6060f1SDimitry Andric if (!InInfo.isValid()) 1119fe6060f1SDimitry Andric return; 1120fe6060f1SDimitry Andric 1121*81ad6265SDimitry Andric // If no change, no need to rerun block 1122*81ad6265SDimitry Andric if (InInfo == BBInfo.Pred) 1123*81ad6265SDimitry Andric return; 1124fe6060f1SDimitry Andric 1125*81ad6265SDimitry Andric BBInfo.Pred = InInfo; 1126*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 1127*81ad6265SDimitry Andric << " changed to " << BBInfo.Pred << "\n"); 1128*81ad6265SDimitry Andric 1129*81ad6265SDimitry Andric // Note: It's tempting to cache the state changes here, but due to the 1130*81ad6265SDimitry Andric // compatibility checks performed a blocks output state can change based on 1131*81ad6265SDimitry Andric // the input state. To cache, we'd have to add logic for finding 1132*81ad6265SDimitry Andric // never-compatible state changes. 1133*81ad6265SDimitry Andric computeVLVTYPEChanges(MBB); 1134*81ad6265SDimitry Andric VSETVLIInfo TmpStatus = BBInfo.Change; 1135fe6060f1SDimitry Andric 1136fe6060f1SDimitry Andric // If the new exit value matches the old exit value, we don't need to revisit 1137fe6060f1SDimitry Andric // any blocks. 1138fe6060f1SDimitry Andric if (BBInfo.Exit == TmpStatus) 1139fe6060f1SDimitry Andric return; 1140fe6060f1SDimitry Andric 1141fe6060f1SDimitry Andric BBInfo.Exit = TmpStatus; 1142*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 1143*81ad6265SDimitry Andric << " changed to " << BBInfo.Exit << "\n"); 1144fe6060f1SDimitry Andric 1145fe6060f1SDimitry Andric // Add the successors to the work list so we can propagate the changed exit 1146fe6060f1SDimitry Andric // status. 1147fe6060f1SDimitry Andric for (MachineBasicBlock *S : MBB.successors()) 1148fe6060f1SDimitry Andric if (!BlockInfo[S->getNumber()].InQueue) 1149fe6060f1SDimitry Andric WorkList.push(S); 1150fe6060f1SDimitry Andric } 1151fe6060f1SDimitry Andric 1152fe6060f1SDimitry Andric // If we weren't able to prove a vsetvli was directly unneeded, it might still 1153*81ad6265SDimitry Andric // be unneeded if the AVL is a phi node where all incoming values are VL 1154fe6060f1SDimitry Andric // outputs from the last VSETVLI in their respective basic blocks. 1155fe6060f1SDimitry Andric bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1156*81ad6265SDimitry Andric const MachineBasicBlock &MBB) const { 1157fe6060f1SDimitry Andric if (DisableInsertVSETVLPHIOpt) 1158fe6060f1SDimitry Andric return true; 1159fe6060f1SDimitry Andric 1160fe6060f1SDimitry Andric if (!Require.hasAVLReg()) 1161fe6060f1SDimitry Andric return true; 1162fe6060f1SDimitry Andric 1163fe6060f1SDimitry Andric Register AVLReg = Require.getAVLReg(); 1164fe6060f1SDimitry Andric if (!AVLReg.isVirtual()) 1165fe6060f1SDimitry Andric return true; 1166fe6060f1SDimitry Andric 1167fe6060f1SDimitry Andric // We need the AVL to be produce by a PHI node in this basic block. 1168fe6060f1SDimitry Andric MachineInstr *PHI = MRI->getVRegDef(AVLReg); 1169fe6060f1SDimitry Andric if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 1170fe6060f1SDimitry Andric return true; 1171fe6060f1SDimitry Andric 1172fe6060f1SDimitry Andric for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 1173fe6060f1SDimitry Andric PHIOp += 2) { 1174fe6060f1SDimitry Andric Register InReg = PHI->getOperand(PHIOp).getReg(); 1175fe6060f1SDimitry Andric MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 1176fe6060f1SDimitry Andric const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 1177fe6060f1SDimitry Andric // If the exit from the predecessor has the VTYPE we are looking for 1178fe6060f1SDimitry Andric // we might be able to avoid a VSETVLI. 1179*81ad6265SDimitry Andric if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) 1180fe6060f1SDimitry Andric return true; 1181fe6060f1SDimitry Andric 1182fe6060f1SDimitry Andric // We need the PHI input to the be the output of a VSET(I)VLI. 1183fe6060f1SDimitry Andric MachineInstr *DefMI = MRI->getVRegDef(InReg); 1184*81ad6265SDimitry Andric if (!DefMI || !isVectorConfigInstr(*DefMI)) 1185fe6060f1SDimitry Andric return true; 1186fe6060f1SDimitry Andric 1187fe6060f1SDimitry Andric // We found a VSET(I)VLI make sure it matches the output of the 1188fe6060f1SDimitry Andric // predecessor block. 1189fe6060f1SDimitry Andric VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1190fe6060f1SDimitry Andric if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 1191fe6060f1SDimitry Andric !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 1192fe6060f1SDimitry Andric return true; 1193fe6060f1SDimitry Andric } 1194fe6060f1SDimitry Andric 1195fe6060f1SDimitry Andric // If all the incoming values to the PHI checked out, we don't need 1196fe6060f1SDimitry Andric // to insert a VSETVLI. 1197fe6060f1SDimitry Andric return false; 1198fe6060f1SDimitry Andric } 1199fe6060f1SDimitry Andric 1200fe6060f1SDimitry Andric void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1201*81ad6265SDimitry Andric VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; 1202*81ad6265SDimitry Andric // Track whether the prefix of the block we've scanned is transparent 1203*81ad6265SDimitry Andric // (meaning has not yet changed the abstract state). 1204*81ad6265SDimitry Andric bool PrefixTransparent = true; 1205fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) { 1206*81ad6265SDimitry Andric const VSETVLIInfo PrevInfo = CurInfo; 1207*81ad6265SDimitry Andric transferBefore(CurInfo, MI); 1208*81ad6265SDimitry Andric 1209fe6060f1SDimitry Andric // If this is an explicit VSETVLI or VSETIVLI, update our state. 1210*81ad6265SDimitry Andric if (isVectorConfigInstr(MI)) { 1211fe6060f1SDimitry Andric // Conservatively, mark the VL and VTYPE as live. 1212fe6060f1SDimitry Andric assert(MI.getOperand(3).getReg() == RISCV::VL && 1213fe6060f1SDimitry Andric MI.getOperand(4).getReg() == RISCV::VTYPE && 1214fe6060f1SDimitry Andric "Unexpected operands where VL and VTYPE should be"); 1215fe6060f1SDimitry Andric MI.getOperand(3).setIsDead(false); 1216fe6060f1SDimitry Andric MI.getOperand(4).setIsDead(false); 1217*81ad6265SDimitry Andric PrefixTransparent = false; 1218fe6060f1SDimitry Andric } 1219fe6060f1SDimitry Andric 1220fe6060f1SDimitry Andric uint64_t TSFlags = MI.getDesc().TSFlags; 1221fe6060f1SDimitry Andric if (RISCVII::hasSEWOp(TSFlags)) { 1222*81ad6265SDimitry Andric if (PrevInfo != CurInfo) { 1223*81ad6265SDimitry Andric // If this is the first implicit state change, and the state change 1224*81ad6265SDimitry Andric // requested can be proven to produce the same register contents, we 1225*81ad6265SDimitry Andric // can skip emitting the actual state change and continue as if we 1226*81ad6265SDimitry Andric // had since we know the GPR result of the implicit state change 1227*81ad6265SDimitry Andric // wouldn't be used and VL/VTYPE registers are correct. Note that 1228*81ad6265SDimitry Andric // we *do* need to model the state as if it changed as while the 1229*81ad6265SDimitry Andric // register contents are unchanged, the abstract model can change. 1230*81ad6265SDimitry Andric if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) 1231*81ad6265SDimitry Andric insertVSETVLI(MBB, MI, CurInfo, PrevInfo); 1232*81ad6265SDimitry Andric PrefixTransparent = false; 1233*81ad6265SDimitry Andric } 1234*81ad6265SDimitry Andric 1235fe6060f1SDimitry Andric if (RISCVII::hasVLOp(TSFlags)) { 1236*81ad6265SDimitry Andric MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1237fe6060f1SDimitry Andric if (VLOp.isReg()) { 1238fe6060f1SDimitry Andric // Erase the AVL operand from the instruction. 1239fe6060f1SDimitry Andric VLOp.setReg(RISCV::NoRegister); 1240fe6060f1SDimitry Andric VLOp.setIsKill(false); 1241fe6060f1SDimitry Andric } 1242fe6060f1SDimitry Andric MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1243fe6060f1SDimitry Andric /*isImp*/ true)); 1244fe6060f1SDimitry Andric } 1245fe6060f1SDimitry Andric MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1246fe6060f1SDimitry Andric /*isImp*/ true)); 1247fe6060f1SDimitry Andric } 1248fe6060f1SDimitry Andric 1249fe6060f1SDimitry Andric if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1250*81ad6265SDimitry Andric MI.modifiesRegister(RISCV::VTYPE)) 1251*81ad6265SDimitry Andric PrefixTransparent = false; 1252*81ad6265SDimitry Andric 1253*81ad6265SDimitry Andric transferAfter(CurInfo, MI); 1254fe6060f1SDimitry Andric } 1255d56accc7SDimitry Andric 1256d56accc7SDimitry Andric // If we reach the end of the block and our current info doesn't match the 1257d56accc7SDimitry Andric // expected info, insert a vsetvli to correct. 1258*81ad6265SDimitry Andric if (!UseStrictAsserts) { 1259d56accc7SDimitry Andric const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; 1260d56accc7SDimitry Andric if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && 1261d56accc7SDimitry Andric CurInfo != ExitInfo) { 1262*81ad6265SDimitry Andric // Note there's an implicit assumption here that terminators never use 1263*81ad6265SDimitry Andric // or modify VL or VTYPE. Also, fallthrough will return end(). 1264*81ad6265SDimitry Andric auto InsertPt = MBB.getFirstInstrTerminator(); 1265*81ad6265SDimitry Andric insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo, 1266*81ad6265SDimitry Andric CurInfo); 1267d56accc7SDimitry Andric CurInfo = ExitInfo; 1268d56accc7SDimitry Andric } 1269d56accc7SDimitry Andric } 1270*81ad6265SDimitry Andric 1271*81ad6265SDimitry Andric if (UseStrictAsserts && CurInfo.isValid()) { 1272*81ad6265SDimitry Andric const auto &Info = BlockInfo[MBB.getNumber()]; 1273*81ad6265SDimitry Andric if (CurInfo != Info.Exit) { 1274*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1275*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1276*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1277*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1278*81ad6265SDimitry Andric } 1279*81ad6265SDimitry Andric assert(CurInfo == Info.Exit && 1280*81ad6265SDimitry Andric "InsertVSETVLI dataflow invariant violated"); 1281*81ad6265SDimitry Andric } 1282*81ad6265SDimitry Andric } 1283*81ad6265SDimitry Andric 1284*81ad6265SDimitry Andric /// Return true if the VL value configured must be equal to the requested one. 1285*81ad6265SDimitry Andric static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) { 1286*81ad6265SDimitry Andric if (!Info.hasAVLImm()) 1287*81ad6265SDimitry Andric // VLMAX is always the same value. 1288*81ad6265SDimitry Andric // TODO: Could extend to other registers by looking at the associated vreg 1289*81ad6265SDimitry Andric // def placement. 1290*81ad6265SDimitry Andric return RISCV::X0 == Info.getAVLReg(); 1291*81ad6265SDimitry Andric 1292*81ad6265SDimitry Andric unsigned AVL = Info.getAVLImm(); 1293*81ad6265SDimitry Andric unsigned SEW = Info.getSEW(); 1294*81ad6265SDimitry Andric unsigned AVLInBits = AVL * SEW; 1295*81ad6265SDimitry Andric 1296*81ad6265SDimitry Andric unsigned LMul; 1297*81ad6265SDimitry Andric bool Fractional; 1298*81ad6265SDimitry Andric std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL()); 1299*81ad6265SDimitry Andric 1300*81ad6265SDimitry Andric if (Fractional) 1301*81ad6265SDimitry Andric return ST.getRealMinVLen() / LMul >= AVLInBits; 1302*81ad6265SDimitry Andric return ST.getRealMinVLen() * LMul >= AVLInBits; 1303*81ad6265SDimitry Andric } 1304*81ad6265SDimitry Andric 1305*81ad6265SDimitry Andric /// Perform simple partial redundancy elimination of the VSETVLI instructions 1306*81ad6265SDimitry Andric /// we're about to insert by looking for cases where we can PRE from the 1307*81ad6265SDimitry Andric /// beginning of one block to the end of one of its predecessors. Specifically, 1308*81ad6265SDimitry Andric /// this is geared to catch the common case of a fixed length vsetvl in a single 1309*81ad6265SDimitry Andric /// block loop when it could execute once in the preheader instead. 1310*81ad6265SDimitry Andric void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1311*81ad6265SDimitry Andric const MachineFunction &MF = *MBB.getParent(); 1312*81ad6265SDimitry Andric const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1313*81ad6265SDimitry Andric 1314*81ad6265SDimitry Andric if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1315*81ad6265SDimitry Andric return; 1316*81ad6265SDimitry Andric 1317*81ad6265SDimitry Andric MachineBasicBlock *UnavailablePred = nullptr; 1318*81ad6265SDimitry Andric VSETVLIInfo AvailableInfo; 1319*81ad6265SDimitry Andric for (MachineBasicBlock *P : MBB.predecessors()) { 1320*81ad6265SDimitry Andric const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1321*81ad6265SDimitry Andric if (PredInfo.isUnknown()) { 1322*81ad6265SDimitry Andric if (UnavailablePred) 1323*81ad6265SDimitry Andric return; 1324*81ad6265SDimitry Andric UnavailablePred = P; 1325*81ad6265SDimitry Andric } else if (!AvailableInfo.isValid()) { 1326*81ad6265SDimitry Andric AvailableInfo = PredInfo; 1327*81ad6265SDimitry Andric } else if (AvailableInfo != PredInfo) { 1328*81ad6265SDimitry Andric return; 1329*81ad6265SDimitry Andric } 1330*81ad6265SDimitry Andric } 1331*81ad6265SDimitry Andric 1332*81ad6265SDimitry Andric // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1333*81ad6265SDimitry Andric // phase 3. 1334*81ad6265SDimitry Andric if (!UnavailablePred || !AvailableInfo.isValid()) 1335*81ad6265SDimitry Andric return; 1336*81ad6265SDimitry Andric 1337*81ad6265SDimitry Andric // Critical edge - TODO: consider splitting? 1338*81ad6265SDimitry Andric if (UnavailablePred->succ_size() != 1) 1339*81ad6265SDimitry Andric return; 1340*81ad6265SDimitry Andric 1341*81ad6265SDimitry Andric // If VL can be less than AVL, then we can't reduce the frequency of exec. 1342*81ad6265SDimitry Andric if (!hasFixedResult(AvailableInfo, ST)) 1343*81ad6265SDimitry Andric return; 1344*81ad6265SDimitry Andric 1345*81ad6265SDimitry Andric // Does it actually let us remove an implicit transition in MBB? 1346*81ad6265SDimitry Andric bool Found = false; 1347*81ad6265SDimitry Andric for (auto &MI : MBB) { 1348*81ad6265SDimitry Andric if (isVectorConfigInstr(MI)) 1349*81ad6265SDimitry Andric return; 1350*81ad6265SDimitry Andric 1351*81ad6265SDimitry Andric const uint64_t TSFlags = MI.getDesc().TSFlags; 1352*81ad6265SDimitry Andric if (RISCVII::hasSEWOp(TSFlags)) { 1353*81ad6265SDimitry Andric if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI)) 1354*81ad6265SDimitry Andric return; 1355*81ad6265SDimitry Andric Found = true; 1356*81ad6265SDimitry Andric break; 1357*81ad6265SDimitry Andric } 1358*81ad6265SDimitry Andric } 1359*81ad6265SDimitry Andric if (!Found) 1360*81ad6265SDimitry Andric return; 1361*81ad6265SDimitry Andric 1362*81ad6265SDimitry Andric // Finally, update both data flow state and insert the actual vsetvli. 1363*81ad6265SDimitry Andric // Doing both keeps the code in sync with the dataflow results, which 1364*81ad6265SDimitry Andric // is critical for correctness of phase 3. 1365*81ad6265SDimitry Andric auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit; 1366*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1367*81ad6265SDimitry Andric << UnavailablePred->getName() << " with state " 1368*81ad6265SDimitry Andric << AvailableInfo << "\n"); 1369*81ad6265SDimitry Andric BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1370*81ad6265SDimitry Andric BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1371*81ad6265SDimitry Andric 1372*81ad6265SDimitry Andric // Note there's an implicit assumption here that terminators never use 1373*81ad6265SDimitry Andric // or modify VL or VTYPE. Also, fallthrough will return end(). 1374*81ad6265SDimitry Andric auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1375*81ad6265SDimitry Andric insertVSETVLI(*UnavailablePred, InsertPt, 1376*81ad6265SDimitry Andric UnavailablePred->findDebugLoc(InsertPt), 1377*81ad6265SDimitry Andric AvailableInfo, OldInfo); 1378*81ad6265SDimitry Andric } 1379*81ad6265SDimitry Andric 1380*81ad6265SDimitry Andric static void doUnion(DemandedFields &A, DemandedFields B) { 1381*81ad6265SDimitry Andric A.VL |= B.VL; 1382*81ad6265SDimitry Andric A.SEW |= B.SEW; 1383*81ad6265SDimitry Andric A.LMUL |= B.LMUL; 1384*81ad6265SDimitry Andric A.SEWLMULRatio |= B.SEWLMULRatio; 1385*81ad6265SDimitry Andric A.TailPolicy |= B.TailPolicy; 1386*81ad6265SDimitry Andric A.MaskPolicy |= B.MaskPolicy; 1387*81ad6265SDimitry Andric } 1388*81ad6265SDimitry Andric 1389*81ad6265SDimitry Andric // Return true if we can mutate PrevMI's VTYPE to match MI's 1390*81ad6265SDimitry Andric // without changing any the fields which have been used. 1391*81ad6265SDimitry Andric // TODO: Restructure code to allow code reuse between this and isCompatible 1392*81ad6265SDimitry Andric // above. 1393*81ad6265SDimitry Andric static bool canMutatePriorConfig(const MachineInstr &PrevMI, 1394*81ad6265SDimitry Andric const MachineInstr &MI, 1395*81ad6265SDimitry Andric const DemandedFields &Used) { 1396*81ad6265SDimitry Andric // TODO: Extend this to handle cases where VL does change, but VL 1397*81ad6265SDimitry Andric // has not been used. (e.g. over a vmv.x.s) 1398*81ad6265SDimitry Andric if (!isVLPreservingConfig(MI)) 1399*81ad6265SDimitry Andric // Note: `vsetvli x0, x0, vtype' is the canonical instruction 1400*81ad6265SDimitry Andric // for this case. If you find yourself wanting to add other forms 1401*81ad6265SDimitry Andric // to this "unused VTYPE" case, we're probably missing a 1402*81ad6265SDimitry Andric // canonicalization earlier. 1403*81ad6265SDimitry Andric return false; 1404*81ad6265SDimitry Andric 1405*81ad6265SDimitry Andric if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) 1406*81ad6265SDimitry Andric return false; 1407*81ad6265SDimitry Andric 1408*81ad6265SDimitry Andric auto PriorVType = PrevMI.getOperand(2).getImm(); 1409*81ad6265SDimitry Andric auto VType = MI.getOperand(2).getImm(); 1410*81ad6265SDimitry Andric return areCompatibleVTYPEs(PriorVType, VType, Used); 1411*81ad6265SDimitry Andric } 1412*81ad6265SDimitry Andric 1413*81ad6265SDimitry Andric void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { 1414*81ad6265SDimitry Andric MachineInstr *PrevMI = nullptr; 1415*81ad6265SDimitry Andric DemandedFields Used; 1416*81ad6265SDimitry Andric SmallVector<MachineInstr*> ToDelete; 1417*81ad6265SDimitry Andric for (MachineInstr &MI : MBB) { 1418*81ad6265SDimitry Andric // Note: Must be *before* vsetvli handling to account for config cases 1419*81ad6265SDimitry Andric // which only change some subfields. 1420*81ad6265SDimitry Andric doUnion(Used, getDemanded(MI)); 1421*81ad6265SDimitry Andric 1422*81ad6265SDimitry Andric if (!isVectorConfigInstr(MI)) 1423*81ad6265SDimitry Andric continue; 1424*81ad6265SDimitry Andric 1425*81ad6265SDimitry Andric if (PrevMI) { 1426*81ad6265SDimitry Andric if (!Used.VL && !Used.usedVTYPE()) { 1427*81ad6265SDimitry Andric ToDelete.push_back(PrevMI); 1428*81ad6265SDimitry Andric // fallthrough 1429*81ad6265SDimitry Andric } else if (canMutatePriorConfig(*PrevMI, MI, Used)) { 1430*81ad6265SDimitry Andric PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm()); 1431*81ad6265SDimitry Andric ToDelete.push_back(&MI); 1432*81ad6265SDimitry Andric // Leave PrevMI unchanged 1433*81ad6265SDimitry Andric continue; 1434*81ad6265SDimitry Andric } 1435*81ad6265SDimitry Andric } 1436*81ad6265SDimitry Andric PrevMI = &MI; 1437*81ad6265SDimitry Andric Used = getDemanded(MI); 1438*81ad6265SDimitry Andric Register VRegDef = MI.getOperand(0).getReg(); 1439*81ad6265SDimitry Andric if (VRegDef != RISCV::X0 && 1440*81ad6265SDimitry Andric !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) 1441*81ad6265SDimitry Andric Used.VL = true; 1442*81ad6265SDimitry Andric } 1443*81ad6265SDimitry Andric 1444*81ad6265SDimitry Andric for (auto *MI : ToDelete) 1445*81ad6265SDimitry Andric MI->eraseFromParent(); 1446*81ad6265SDimitry Andric } 1447*81ad6265SDimitry Andric 1448*81ad6265SDimitry Andric void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1449*81ad6265SDimitry Andric for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1450*81ad6265SDimitry Andric MachineInstr &MI = *I++; 1451*81ad6265SDimitry Andric if (RISCV::isFaultFirstLoad(MI)) { 1452*81ad6265SDimitry Andric Register VLOutput = MI.getOperand(1).getReg(); 1453*81ad6265SDimitry Andric if (!MRI->use_nodbg_empty(VLOutput)) 1454*81ad6265SDimitry Andric BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), 1455*81ad6265SDimitry Andric VLOutput); 1456*81ad6265SDimitry Andric // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1457*81ad6265SDimitry Andric MI.getOperand(1).setReg(RISCV::X0); 1458*81ad6265SDimitry Andric } 1459fe6060f1SDimitry Andric } 1460fe6060f1SDimitry Andric } 1461fe6060f1SDimitry Andric 1462fe6060f1SDimitry Andric bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1463fe6060f1SDimitry Andric // Skip if the vector extension is not enabled. 1464fe6060f1SDimitry Andric const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1465349cc55cSDimitry Andric if (!ST.hasVInstructions()) 1466fe6060f1SDimitry Andric return false; 1467fe6060f1SDimitry Andric 1468*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1469*81ad6265SDimitry Andric 1470fe6060f1SDimitry Andric TII = ST.getInstrInfo(); 1471fe6060f1SDimitry Andric MRI = &MF.getRegInfo(); 1472fe6060f1SDimitry Andric 1473fe6060f1SDimitry Andric assert(BlockInfo.empty() && "Expect empty block infos"); 1474fe6060f1SDimitry Andric BlockInfo.resize(MF.getNumBlockIDs()); 1475fe6060f1SDimitry Andric 1476fe6060f1SDimitry Andric bool HaveVectorOp = false; 1477fe6060f1SDimitry Andric 1478fe6060f1SDimitry Andric // Phase 1 - determine how VL/VTYPE are affected by the each block. 1479*81ad6265SDimitry Andric for (const MachineBasicBlock &MBB : MF) { 1480fe6060f1SDimitry Andric HaveVectorOp |= computeVLVTYPEChanges(MBB); 1481*81ad6265SDimitry Andric // Initial exit state is whatever change we found in the block. 1482*81ad6265SDimitry Andric BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1483*81ad6265SDimitry Andric BBInfo.Exit = BBInfo.Change; 1484*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1485*81ad6265SDimitry Andric << " is " << BBInfo.Exit << "\n"); 1486*81ad6265SDimitry Andric 1487*81ad6265SDimitry Andric } 1488fe6060f1SDimitry Andric 1489fe6060f1SDimitry Andric // If we didn't find any instructions that need VSETVLI, we're done. 1490*81ad6265SDimitry Andric if (!HaveVectorOp) { 1491*81ad6265SDimitry Andric BlockInfo.clear(); 1492*81ad6265SDimitry Andric return false; 1493*81ad6265SDimitry Andric } 1494*81ad6265SDimitry Andric 1495fe6060f1SDimitry Andric // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1496fe6060f1SDimitry Andric // blocks to the list here, but will also add any that need to be revisited 1497fe6060f1SDimitry Andric // during Phase 2 processing. 1498fe6060f1SDimitry Andric for (const MachineBasicBlock &MBB : MF) { 1499fe6060f1SDimitry Andric WorkList.push(&MBB); 1500fe6060f1SDimitry Andric BlockInfo[MBB.getNumber()].InQueue = true; 1501fe6060f1SDimitry Andric } 1502fe6060f1SDimitry Andric while (!WorkList.empty()) { 1503fe6060f1SDimitry Andric const MachineBasicBlock &MBB = *WorkList.front(); 1504fe6060f1SDimitry Andric WorkList.pop(); 1505fe6060f1SDimitry Andric computeIncomingVLVTYPE(MBB); 1506fe6060f1SDimitry Andric } 1507fe6060f1SDimitry Andric 1508*81ad6265SDimitry Andric // Perform partial redundancy elimination of vsetvli transitions. 1509*81ad6265SDimitry Andric for (MachineBasicBlock &MBB : MF) 1510*81ad6265SDimitry Andric doPRE(MBB); 1511*81ad6265SDimitry Andric 1512fe6060f1SDimitry Andric // Phase 3 - add any vsetvli instructions needed in the block. Use the 1513fe6060f1SDimitry Andric // Phase 2 information to avoid adding vsetvlis before the first vector 1514fe6060f1SDimitry Andric // instruction in the block if the VL/VTYPE is satisfied by its 1515fe6060f1SDimitry Andric // predecessors. 1516fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) 1517fe6060f1SDimitry Andric emitVSETVLIs(MBB); 1518*81ad6265SDimitry Andric 1519*81ad6265SDimitry Andric // Now that all vsetvlis are explicit, go through and do block local 1520*81ad6265SDimitry Andric // DSE and peephole based demanded fields based transforms. Note that 1521*81ad6265SDimitry Andric // this *must* be done outside the main dataflow so long as we allow 1522*81ad6265SDimitry Andric // any cross block analysis within the dataflow. We can't have both 1523*81ad6265SDimitry Andric // demanded fields based mutation and non-local analysis in the 1524*81ad6265SDimitry Andric // dataflow at the same time without introducing inconsistencies. 1525*81ad6265SDimitry Andric for (MachineBasicBlock &MBB : MF) 1526*81ad6265SDimitry Andric doLocalPostpass(MBB); 1527*81ad6265SDimitry Andric 1528*81ad6265SDimitry Andric // Once we're fully done rewriting all the instructions, do a final pass 1529*81ad6265SDimitry Andric // through to check for VSETVLIs which write to an unused destination. 1530*81ad6265SDimitry Andric // For the non X0, X0 variant, we can replace the destination register 1531*81ad6265SDimitry Andric // with X0 to reduce register pressure. This is really a generic 1532*81ad6265SDimitry Andric // optimization which can be applied to any dead def (TODO: generalize). 1533*81ad6265SDimitry Andric for (MachineBasicBlock &MBB : MF) { 1534*81ad6265SDimitry Andric for (MachineInstr &MI : MBB) { 1535*81ad6265SDimitry Andric if (MI.getOpcode() == RISCV::PseudoVSETVLI || 1536*81ad6265SDimitry Andric MI.getOpcode() == RISCV::PseudoVSETIVLI) { 1537*81ad6265SDimitry Andric Register VRegDef = MI.getOperand(0).getReg(); 1538*81ad6265SDimitry Andric if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef)) 1539*81ad6265SDimitry Andric MI.getOperand(0).setReg(RISCV::X0); 1540*81ad6265SDimitry Andric } 1541*81ad6265SDimitry Andric } 1542fe6060f1SDimitry Andric } 1543fe6060f1SDimitry Andric 1544*81ad6265SDimitry Andric // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1545*81ad6265SDimitry Andric // of VLEFF/VLSEGFF. 1546*81ad6265SDimitry Andric for (MachineBasicBlock &MBB : MF) 1547*81ad6265SDimitry Andric insertReadVL(MBB); 1548fe6060f1SDimitry Andric 1549*81ad6265SDimitry Andric BlockInfo.clear(); 1550fe6060f1SDimitry Andric return HaveVectorOp; 1551fe6060f1SDimitry Andric } 1552fe6060f1SDimitry Andric 1553fe6060f1SDimitry Andric /// Returns an instance of the Insert VSETVLI pass. 1554fe6060f1SDimitry Andric FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1555fe6060f1SDimitry Andric return new RISCVInsertVSETVLI(); 1556fe6060f1SDimitry Andric } 1557