10fca6ea1SDimitry Andric //===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===// 20fca6ea1SDimitry Andric // 30fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60fca6ea1SDimitry Andric // 70fca6ea1SDimitry Andric //===---------------------------------------------------------------------===// 80fca6ea1SDimitry Andric // 90fca6ea1SDimitry Andric // This pass does some optimizations for *W instructions at the MI level. 100fca6ea1SDimitry Andric // 110fca6ea1SDimitry Andric // First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either 120fca6ea1SDimitry Andric // because the sign extended bits aren't consumed or because the input was 130fca6ea1SDimitry Andric // already sign extended by an earlier instruction. 140fca6ea1SDimitry Andric // 150fca6ea1SDimitry Andric // Then: 160fca6ea1SDimitry Andric // 1. Unless explicit disabled or the target prefers instructions with W suffix, 170fca6ea1SDimitry Andric // it removes the -w suffix from opw instructions whenever all users are 180fca6ea1SDimitry Andric // dependent only on the lower word of the result of the instruction. 190fca6ea1SDimitry Andric // The cases handled are: 200fca6ea1SDimitry Andric // * addi.w because it helps reduce test differences between LA32 and LA64 210fca6ea1SDimitry Andric // w/o being a pessimization. 220fca6ea1SDimitry Andric // 230fca6ea1SDimitry Andric // 2. Or if explicit enabled or the target prefers instructions with W suffix, 240fca6ea1SDimitry Andric // it adds the W suffix to the instruction whenever all users are dependent 250fca6ea1SDimitry Andric // only on the lower word of the result of the instruction. 260fca6ea1SDimitry Andric // The cases handled are: 270fca6ea1SDimitry Andric // * add.d/addi.d/sub.d/mul.d. 280fca6ea1SDimitry Andric // * slli.d with imm < 32. 290fca6ea1SDimitry Andric // * ld.d/ld.wu. 300fca6ea1SDimitry Andric //===---------------------------------------------------------------------===// 310fca6ea1SDimitry Andric 320fca6ea1SDimitry Andric #include "LoongArch.h" 330fca6ea1SDimitry Andric #include "LoongArchMachineFunctionInfo.h" 340fca6ea1SDimitry Andric #include "LoongArchSubtarget.h" 350fca6ea1SDimitry Andric #include "llvm/ADT/SmallSet.h" 360fca6ea1SDimitry Andric #include "llvm/ADT/Statistic.h" 370fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 380fca6ea1SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 390fca6ea1SDimitry Andric 400fca6ea1SDimitry Andric using namespace llvm; 410fca6ea1SDimitry Andric 420fca6ea1SDimitry Andric #define DEBUG_TYPE "loongarch-opt-w-instrs" 430fca6ea1SDimitry Andric #define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions" 440fca6ea1SDimitry Andric 450fca6ea1SDimitry Andric STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions"); 460fca6ea1SDimitry Andric STATISTIC(NumTransformedToWInstrs, 470fca6ea1SDimitry Andric "Number of instructions transformed to W-ops"); 480fca6ea1SDimitry Andric 490fca6ea1SDimitry Andric static cl::opt<bool> 500fca6ea1SDimitry Andric DisableSExtWRemoval("loongarch-disable-sextw-removal", 510fca6ea1SDimitry Andric cl::desc("Disable removal of sign-extend insn"), 520fca6ea1SDimitry Andric cl::init(false), cl::Hidden); 530fca6ea1SDimitry Andric static cl::opt<bool> 540fca6ea1SDimitry Andric DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix", 550fca6ea1SDimitry Andric cl::desc("Disable convert to D suffix"), 560fca6ea1SDimitry Andric cl::init(false), cl::Hidden); 570fca6ea1SDimitry Andric 580fca6ea1SDimitry Andric namespace { 590fca6ea1SDimitry Andric 600fca6ea1SDimitry Andric class LoongArchOptWInstrs : public MachineFunctionPass { 610fca6ea1SDimitry Andric public: 620fca6ea1SDimitry Andric static char ID; 630fca6ea1SDimitry Andric 640fca6ea1SDimitry Andric LoongArchOptWInstrs() : MachineFunctionPass(ID) {} 650fca6ea1SDimitry Andric 660fca6ea1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 670fca6ea1SDimitry Andric bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII, 680fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 690fca6ea1SDimitry Andric MachineRegisterInfo &MRI); 700fca6ea1SDimitry Andric bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII, 710fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 720fca6ea1SDimitry Andric MachineRegisterInfo &MRI); 730fca6ea1SDimitry Andric bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII, 740fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 750fca6ea1SDimitry Andric MachineRegisterInfo &MRI); 760fca6ea1SDimitry Andric 770fca6ea1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 780fca6ea1SDimitry Andric AU.setPreservesCFG(); 790fca6ea1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 800fca6ea1SDimitry Andric } 810fca6ea1SDimitry Andric 820fca6ea1SDimitry Andric StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; } 830fca6ea1SDimitry Andric }; 840fca6ea1SDimitry Andric 850fca6ea1SDimitry Andric } // end anonymous namespace 860fca6ea1SDimitry Andric 870fca6ea1SDimitry Andric char LoongArchOptWInstrs::ID = 0; 880fca6ea1SDimitry Andric INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME, 890fca6ea1SDimitry Andric false, false) 900fca6ea1SDimitry Andric 910fca6ea1SDimitry Andric FunctionPass *llvm::createLoongArchOptWInstrsPass() { 920fca6ea1SDimitry Andric return new LoongArchOptWInstrs(); 930fca6ea1SDimitry Andric } 940fca6ea1SDimitry Andric 950fca6ea1SDimitry Andric // Checks if all users only demand the lower \p OrigBits of the original 960fca6ea1SDimitry Andric // instruction's result. 970fca6ea1SDimitry Andric // TODO: handle multiple interdependent transformations 980fca6ea1SDimitry Andric static bool hasAllNBitUsers(const MachineInstr &OrigMI, 990fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 1000fca6ea1SDimitry Andric const MachineRegisterInfo &MRI, unsigned OrigBits) { 1010fca6ea1SDimitry Andric 1020fca6ea1SDimitry Andric SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited; 1030fca6ea1SDimitry Andric SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist; 1040fca6ea1SDimitry Andric 1050fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(&OrigMI, OrigBits)); 1060fca6ea1SDimitry Andric 1070fca6ea1SDimitry Andric while (!Worklist.empty()) { 1080fca6ea1SDimitry Andric auto P = Worklist.pop_back_val(); 1090fca6ea1SDimitry Andric const MachineInstr *MI = P.first; 1100fca6ea1SDimitry Andric unsigned Bits = P.second; 1110fca6ea1SDimitry Andric 1120fca6ea1SDimitry Andric if (!Visited.insert(P).second) 1130fca6ea1SDimitry Andric continue; 1140fca6ea1SDimitry Andric 1150fca6ea1SDimitry Andric // Only handle instructions with one def. 1160fca6ea1SDimitry Andric if (MI->getNumExplicitDefs() != 1) 1170fca6ea1SDimitry Andric return false; 1180fca6ea1SDimitry Andric 1190fca6ea1SDimitry Andric Register DestReg = MI->getOperand(0).getReg(); 1200fca6ea1SDimitry Andric if (!DestReg.isVirtual()) 1210fca6ea1SDimitry Andric return false; 1220fca6ea1SDimitry Andric 1230fca6ea1SDimitry Andric for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) { 1240fca6ea1SDimitry Andric const MachineInstr *UserMI = UserOp.getParent(); 1250fca6ea1SDimitry Andric unsigned OpIdx = UserOp.getOperandNo(); 1260fca6ea1SDimitry Andric 1270fca6ea1SDimitry Andric switch (UserMI->getOpcode()) { 1280fca6ea1SDimitry Andric default: 1290fca6ea1SDimitry Andric // TODO: Add vector 1300fca6ea1SDimitry Andric return false; 1310fca6ea1SDimitry Andric 1320fca6ea1SDimitry Andric case LoongArch::ADD_W: 1330fca6ea1SDimitry Andric case LoongArch::ADDI_W: 1340fca6ea1SDimitry Andric case LoongArch::SUB_W: 1350fca6ea1SDimitry Andric case LoongArch::ALSL_W: 1360fca6ea1SDimitry Andric case LoongArch::ALSL_WU: 1370fca6ea1SDimitry Andric case LoongArch::MUL_W: 1380fca6ea1SDimitry Andric case LoongArch::MULH_W: 1390fca6ea1SDimitry Andric case LoongArch::MULH_WU: 1400fca6ea1SDimitry Andric case LoongArch::MULW_D_W: 1410fca6ea1SDimitry Andric case LoongArch::MULW_D_WU: 1420fca6ea1SDimitry Andric // TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+. 1430fca6ea1SDimitry Andric // case LoongArch::DIV_W: 1440fca6ea1SDimitry Andric // case LoongArch::DIV_WU: 1450fca6ea1SDimitry Andric // case LoongArch::MOD_W: 1460fca6ea1SDimitry Andric // case LoongArch::MOD_WU: 1470fca6ea1SDimitry Andric case LoongArch::SLL_W: 1480fca6ea1SDimitry Andric case LoongArch::SLLI_W: 1490fca6ea1SDimitry Andric case LoongArch::SRL_W: 1500fca6ea1SDimitry Andric case LoongArch::SRLI_W: 1510fca6ea1SDimitry Andric case LoongArch::SRA_W: 1520fca6ea1SDimitry Andric case LoongArch::SRAI_W: 1530fca6ea1SDimitry Andric case LoongArch::ROTR_W: 1540fca6ea1SDimitry Andric case LoongArch::ROTRI_W: 1550fca6ea1SDimitry Andric case LoongArch::CLO_W: 1560fca6ea1SDimitry Andric case LoongArch::CLZ_W: 1570fca6ea1SDimitry Andric case LoongArch::CTO_W: 1580fca6ea1SDimitry Andric case LoongArch::CTZ_W: 1590fca6ea1SDimitry Andric case LoongArch::BYTEPICK_W: 1600fca6ea1SDimitry Andric case LoongArch::REVB_2H: 1610fca6ea1SDimitry Andric case LoongArch::BITREV_4B: 1620fca6ea1SDimitry Andric case LoongArch::BITREV_W: 1630fca6ea1SDimitry Andric case LoongArch::BSTRINS_W: 1640fca6ea1SDimitry Andric case LoongArch::BSTRPICK_W: 1650fca6ea1SDimitry Andric case LoongArch::CRC_W_W_W: 1660fca6ea1SDimitry Andric case LoongArch::CRCC_W_W_W: 1670fca6ea1SDimitry Andric case LoongArch::MOVGR2FCSR: 1680fca6ea1SDimitry Andric case LoongArch::MOVGR2FRH_W: 1690fca6ea1SDimitry Andric case LoongArch::MOVGR2FR_W_64: 1700fca6ea1SDimitry Andric if (Bits >= 32) 1710fca6ea1SDimitry Andric break; 1720fca6ea1SDimitry Andric return false; 1730fca6ea1SDimitry Andric case LoongArch::MOVGR2CF: 1740fca6ea1SDimitry Andric if (Bits >= 1) 1750fca6ea1SDimitry Andric break; 1760fca6ea1SDimitry Andric return false; 1770fca6ea1SDimitry Andric case LoongArch::EXT_W_B: 1780fca6ea1SDimitry Andric if (Bits >= 8) 1790fca6ea1SDimitry Andric break; 1800fca6ea1SDimitry Andric return false; 1810fca6ea1SDimitry Andric case LoongArch::EXT_W_H: 1820fca6ea1SDimitry Andric if (Bits >= 16) 1830fca6ea1SDimitry Andric break; 1840fca6ea1SDimitry Andric return false; 1850fca6ea1SDimitry Andric 1860fca6ea1SDimitry Andric case LoongArch::SRLI_D: { 1870fca6ea1SDimitry Andric // If we are shifting right by less than Bits, and users don't demand 1880fca6ea1SDimitry Andric // any bits that were shifted into [Bits-1:0], then we can consider this 1890fca6ea1SDimitry Andric // as an N-Bit user. 1900fca6ea1SDimitry Andric unsigned ShAmt = UserMI->getOperand(2).getImm(); 1910fca6ea1SDimitry Andric if (Bits > ShAmt) { 1920fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt)); 1930fca6ea1SDimitry Andric break; 1940fca6ea1SDimitry Andric } 1950fca6ea1SDimitry Andric return false; 1960fca6ea1SDimitry Andric } 1970fca6ea1SDimitry Andric 1980fca6ea1SDimitry Andric // these overwrite higher input bits, otherwise the lower word of output 1990fca6ea1SDimitry Andric // depends only on the lower word of input. So check their uses read W. 2000fca6ea1SDimitry Andric case LoongArch::SLLI_D: 2010fca6ea1SDimitry Andric if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm())) 2020fca6ea1SDimitry Andric break; 2030fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 2040fca6ea1SDimitry Andric break; 2050fca6ea1SDimitry Andric case LoongArch::ANDI: { 2060fca6ea1SDimitry Andric uint64_t Imm = UserMI->getOperand(2).getImm(); 2070fca6ea1SDimitry Andric if (Bits >= (unsigned)llvm::bit_width(Imm)) 2080fca6ea1SDimitry Andric break; 2090fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 2100fca6ea1SDimitry Andric break; 2110fca6ea1SDimitry Andric } 2120fca6ea1SDimitry Andric case LoongArch::ORI: { 2130fca6ea1SDimitry Andric uint64_t Imm = UserMI->getOperand(2).getImm(); 2140fca6ea1SDimitry Andric if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 2150fca6ea1SDimitry Andric break; 2160fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 2170fca6ea1SDimitry Andric break; 2180fca6ea1SDimitry Andric } 2190fca6ea1SDimitry Andric 2200fca6ea1SDimitry Andric case LoongArch::SLL_D: 2210fca6ea1SDimitry Andric // Operand 2 is the shift amount which uses log2(grlen) bits. 2220fca6ea1SDimitry Andric if (OpIdx == 2) { 2230fca6ea1SDimitry Andric if (Bits >= Log2_32(ST.getGRLen())) 2240fca6ea1SDimitry Andric break; 2250fca6ea1SDimitry Andric return false; 2260fca6ea1SDimitry Andric } 2270fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 2280fca6ea1SDimitry Andric break; 2290fca6ea1SDimitry Andric 2300fca6ea1SDimitry Andric case LoongArch::SRA_D: 2310fca6ea1SDimitry Andric case LoongArch::SRL_D: 2320fca6ea1SDimitry Andric case LoongArch::ROTR_D: 2330fca6ea1SDimitry Andric // Operand 2 is the shift amount which uses 6 bits. 2340fca6ea1SDimitry Andric if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen())) 2350fca6ea1SDimitry Andric break; 2360fca6ea1SDimitry Andric return false; 2370fca6ea1SDimitry Andric 2380fca6ea1SDimitry Andric case LoongArch::ST_B: 2390fca6ea1SDimitry Andric case LoongArch::STX_B: 2400fca6ea1SDimitry Andric case LoongArch::STGT_B: 2410fca6ea1SDimitry Andric case LoongArch::STLE_B: 2420fca6ea1SDimitry Andric case LoongArch::IOCSRWR_B: 2430fca6ea1SDimitry Andric // The first argument is the value to store. 2440fca6ea1SDimitry Andric if (OpIdx == 0 && Bits >= 8) 2450fca6ea1SDimitry Andric break; 2460fca6ea1SDimitry Andric return false; 2470fca6ea1SDimitry Andric case LoongArch::ST_H: 2480fca6ea1SDimitry Andric case LoongArch::STX_H: 2490fca6ea1SDimitry Andric case LoongArch::STGT_H: 2500fca6ea1SDimitry Andric case LoongArch::STLE_H: 2510fca6ea1SDimitry Andric case LoongArch::IOCSRWR_H: 2520fca6ea1SDimitry Andric // The first argument is the value to store. 2530fca6ea1SDimitry Andric if (OpIdx == 0 && Bits >= 16) 2540fca6ea1SDimitry Andric break; 2550fca6ea1SDimitry Andric return false; 2560fca6ea1SDimitry Andric case LoongArch::ST_W: 2570fca6ea1SDimitry Andric case LoongArch::STX_W: 2580fca6ea1SDimitry Andric case LoongArch::SCREL_W: 2590fca6ea1SDimitry Andric case LoongArch::STPTR_W: 2600fca6ea1SDimitry Andric case LoongArch::STGT_W: 2610fca6ea1SDimitry Andric case LoongArch::STLE_W: 2620fca6ea1SDimitry Andric case LoongArch::IOCSRWR_W: 2630fca6ea1SDimitry Andric // The first argument is the value to store. 2640fca6ea1SDimitry Andric if (OpIdx == 0 && Bits >= 32) 2650fca6ea1SDimitry Andric break; 2660fca6ea1SDimitry Andric return false; 2670fca6ea1SDimitry Andric 2680fca6ea1SDimitry Andric case LoongArch::CRC_W_B_W: 2690fca6ea1SDimitry Andric case LoongArch::CRCC_W_B_W: 2700fca6ea1SDimitry Andric if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32)) 2710fca6ea1SDimitry Andric break; 2720fca6ea1SDimitry Andric return false; 2730fca6ea1SDimitry Andric case LoongArch::CRC_W_H_W: 2740fca6ea1SDimitry Andric case LoongArch::CRCC_W_H_W: 2750fca6ea1SDimitry Andric if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32)) 2760fca6ea1SDimitry Andric break; 2770fca6ea1SDimitry Andric return false; 2780fca6ea1SDimitry Andric case LoongArch::CRC_W_D_W: 2790fca6ea1SDimitry Andric case LoongArch::CRCC_W_D_W: 2800fca6ea1SDimitry Andric if (OpIdx == 2 && Bits >= 32) 2810fca6ea1SDimitry Andric break; 2820fca6ea1SDimitry Andric return false; 2830fca6ea1SDimitry Andric 2840fca6ea1SDimitry Andric // For these, lower word of output in these operations, depends only on 2850fca6ea1SDimitry Andric // the lower word of input. So, we check all uses only read lower word. 2860fca6ea1SDimitry Andric case LoongArch::COPY: 2870fca6ea1SDimitry Andric case LoongArch::PHI: 2880fca6ea1SDimitry Andric case LoongArch::ADD_D: 2890fca6ea1SDimitry Andric case LoongArch::ADDI_D: 2900fca6ea1SDimitry Andric case LoongArch::SUB_D: 2910fca6ea1SDimitry Andric case LoongArch::MUL_D: 2920fca6ea1SDimitry Andric case LoongArch::AND: 2930fca6ea1SDimitry Andric case LoongArch::OR: 2940fca6ea1SDimitry Andric case LoongArch::NOR: 2950fca6ea1SDimitry Andric case LoongArch::XOR: 2960fca6ea1SDimitry Andric case LoongArch::XORI: 2970fca6ea1SDimitry Andric case LoongArch::ANDN: 2980fca6ea1SDimitry Andric case LoongArch::ORN: 2990fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 3000fca6ea1SDimitry Andric break; 3010fca6ea1SDimitry Andric 3020fca6ea1SDimitry Andric case LoongArch::MASKNEZ: 3030fca6ea1SDimitry Andric case LoongArch::MASKEQZ: 3040fca6ea1SDimitry Andric if (OpIdx != 1) 3050fca6ea1SDimitry Andric return false; 3060fca6ea1SDimitry Andric Worklist.push_back(std::make_pair(UserMI, Bits)); 3070fca6ea1SDimitry Andric break; 3080fca6ea1SDimitry Andric } 3090fca6ea1SDimitry Andric } 3100fca6ea1SDimitry Andric } 3110fca6ea1SDimitry Andric 3120fca6ea1SDimitry Andric return true; 3130fca6ea1SDimitry Andric } 3140fca6ea1SDimitry Andric 3150fca6ea1SDimitry Andric static bool hasAllWUsers(const MachineInstr &OrigMI, 3160fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 3170fca6ea1SDimitry Andric const MachineRegisterInfo &MRI) { 3180fca6ea1SDimitry Andric return hasAllNBitUsers(OrigMI, ST, MRI, 32); 3190fca6ea1SDimitry Andric } 3200fca6ea1SDimitry Andric 3210fca6ea1SDimitry Andric // This function returns true if the machine instruction always outputs a value 3220fca6ea1SDimitry Andric // where bits 63:32 match bit 31. 3230fca6ea1SDimitry Andric static bool isSignExtendingOpW(const MachineInstr &MI, 3240fca6ea1SDimitry Andric const MachineRegisterInfo &MRI, unsigned OpNo) { 3250fca6ea1SDimitry Andric switch (MI.getOpcode()) { 3260fca6ea1SDimitry Andric // Normal cases 3270fca6ea1SDimitry Andric case LoongArch::ADD_W: 3280fca6ea1SDimitry Andric case LoongArch::SUB_W: 3290fca6ea1SDimitry Andric case LoongArch::ADDI_W: 3300fca6ea1SDimitry Andric case LoongArch::ALSL_W: 3310fca6ea1SDimitry Andric case LoongArch::LU12I_W: 3320fca6ea1SDimitry Andric case LoongArch::SLT: 3330fca6ea1SDimitry Andric case LoongArch::SLTU: 3340fca6ea1SDimitry Andric case LoongArch::SLTI: 3350fca6ea1SDimitry Andric case LoongArch::SLTUI: 3360fca6ea1SDimitry Andric case LoongArch::ANDI: 3370fca6ea1SDimitry Andric case LoongArch::MUL_W: 3380fca6ea1SDimitry Andric case LoongArch::MULH_W: 3390fca6ea1SDimitry Andric case LoongArch::MULH_WU: 3400fca6ea1SDimitry Andric case LoongArch::DIV_W: 3410fca6ea1SDimitry Andric case LoongArch::MOD_W: 3420fca6ea1SDimitry Andric case LoongArch::DIV_WU: 3430fca6ea1SDimitry Andric case LoongArch::MOD_WU: 3440fca6ea1SDimitry Andric case LoongArch::SLL_W: 3450fca6ea1SDimitry Andric case LoongArch::SRL_W: 3460fca6ea1SDimitry Andric case LoongArch::SRA_W: 3470fca6ea1SDimitry Andric case LoongArch::ROTR_W: 3480fca6ea1SDimitry Andric case LoongArch::SLLI_W: 3490fca6ea1SDimitry Andric case LoongArch::SRLI_W: 3500fca6ea1SDimitry Andric case LoongArch::SRAI_W: 3510fca6ea1SDimitry Andric case LoongArch::ROTRI_W: 3520fca6ea1SDimitry Andric case LoongArch::EXT_W_B: 3530fca6ea1SDimitry Andric case LoongArch::EXT_W_H: 3540fca6ea1SDimitry Andric case LoongArch::CLO_W: 3550fca6ea1SDimitry Andric case LoongArch::CLZ_W: 3560fca6ea1SDimitry Andric case LoongArch::CTO_W: 3570fca6ea1SDimitry Andric case LoongArch::CTZ_W: 3580fca6ea1SDimitry Andric case LoongArch::BYTEPICK_W: 3590fca6ea1SDimitry Andric case LoongArch::REVB_2H: 3600fca6ea1SDimitry Andric case LoongArch::BITREV_4B: 3610fca6ea1SDimitry Andric case LoongArch::BITREV_W: 3620fca6ea1SDimitry Andric case LoongArch::BSTRINS_W: 3630fca6ea1SDimitry Andric case LoongArch::BSTRPICK_W: 3640fca6ea1SDimitry Andric case LoongArch::LD_B: 3650fca6ea1SDimitry Andric case LoongArch::LD_H: 3660fca6ea1SDimitry Andric case LoongArch::LD_W: 3670fca6ea1SDimitry Andric case LoongArch::LD_BU: 3680fca6ea1SDimitry Andric case LoongArch::LD_HU: 3690fca6ea1SDimitry Andric case LoongArch::LL_W: 3700fca6ea1SDimitry Andric case LoongArch::LLACQ_W: 3710fca6ea1SDimitry Andric case LoongArch::RDTIMEL_W: 3720fca6ea1SDimitry Andric case LoongArch::RDTIMEH_W: 3730fca6ea1SDimitry Andric case LoongArch::CPUCFG: 3740fca6ea1SDimitry Andric case LoongArch::LDX_B: 3750fca6ea1SDimitry Andric case LoongArch::LDX_H: 3760fca6ea1SDimitry Andric case LoongArch::LDX_W: 3770fca6ea1SDimitry Andric case LoongArch::LDX_BU: 3780fca6ea1SDimitry Andric case LoongArch::LDX_HU: 3790fca6ea1SDimitry Andric case LoongArch::LDPTR_W: 3800fca6ea1SDimitry Andric case LoongArch::LDGT_B: 3810fca6ea1SDimitry Andric case LoongArch::LDGT_H: 3820fca6ea1SDimitry Andric case LoongArch::LDGT_W: 3830fca6ea1SDimitry Andric case LoongArch::LDLE_B: 3840fca6ea1SDimitry Andric case LoongArch::LDLE_H: 3850fca6ea1SDimitry Andric case LoongArch::LDLE_W: 3860fca6ea1SDimitry Andric case LoongArch::AMSWAP_B: 3870fca6ea1SDimitry Andric case LoongArch::AMSWAP_H: 3880fca6ea1SDimitry Andric case LoongArch::AMSWAP_W: 3890fca6ea1SDimitry Andric case LoongArch::AMADD_B: 3900fca6ea1SDimitry Andric case LoongArch::AMADD_H: 3910fca6ea1SDimitry Andric case LoongArch::AMADD_W: 3920fca6ea1SDimitry Andric case LoongArch::AMAND_W: 3930fca6ea1SDimitry Andric case LoongArch::AMOR_W: 3940fca6ea1SDimitry Andric case LoongArch::AMXOR_W: 3950fca6ea1SDimitry Andric case LoongArch::AMMAX_W: 3960fca6ea1SDimitry Andric case LoongArch::AMMIN_W: 3970fca6ea1SDimitry Andric case LoongArch::AMMAX_WU: 3980fca6ea1SDimitry Andric case LoongArch::AMMIN_WU: 3990fca6ea1SDimitry Andric case LoongArch::AMSWAP__DB_B: 4000fca6ea1SDimitry Andric case LoongArch::AMSWAP__DB_H: 4010fca6ea1SDimitry Andric case LoongArch::AMSWAP__DB_W: 4020fca6ea1SDimitry Andric case LoongArch::AMADD__DB_B: 4030fca6ea1SDimitry Andric case LoongArch::AMADD__DB_H: 4040fca6ea1SDimitry Andric case LoongArch::AMADD__DB_W: 4050fca6ea1SDimitry Andric case LoongArch::AMAND__DB_W: 4060fca6ea1SDimitry Andric case LoongArch::AMOR__DB_W: 4070fca6ea1SDimitry Andric case LoongArch::AMXOR__DB_W: 4080fca6ea1SDimitry Andric case LoongArch::AMMAX__DB_W: 4090fca6ea1SDimitry Andric case LoongArch::AMMIN__DB_W: 4100fca6ea1SDimitry Andric case LoongArch::AMMAX__DB_WU: 4110fca6ea1SDimitry Andric case LoongArch::AMMIN__DB_WU: 4120fca6ea1SDimitry Andric case LoongArch::AMCAS_B: 4130fca6ea1SDimitry Andric case LoongArch::AMCAS_H: 4140fca6ea1SDimitry Andric case LoongArch::AMCAS_W: 4150fca6ea1SDimitry Andric case LoongArch::AMCAS__DB_B: 4160fca6ea1SDimitry Andric case LoongArch::AMCAS__DB_H: 4170fca6ea1SDimitry Andric case LoongArch::AMCAS__DB_W: 4180fca6ea1SDimitry Andric case LoongArch::CRC_W_B_W: 4190fca6ea1SDimitry Andric case LoongArch::CRC_W_H_W: 4200fca6ea1SDimitry Andric case LoongArch::CRC_W_W_W: 4210fca6ea1SDimitry Andric case LoongArch::CRC_W_D_W: 4220fca6ea1SDimitry Andric case LoongArch::CRCC_W_B_W: 4230fca6ea1SDimitry Andric case LoongArch::CRCC_W_H_W: 4240fca6ea1SDimitry Andric case LoongArch::CRCC_W_W_W: 4250fca6ea1SDimitry Andric case LoongArch::CRCC_W_D_W: 4260fca6ea1SDimitry Andric case LoongArch::IOCSRRD_B: 4270fca6ea1SDimitry Andric case LoongArch::IOCSRRD_H: 4280fca6ea1SDimitry Andric case LoongArch::IOCSRRD_W: 4290fca6ea1SDimitry Andric case LoongArch::MOVFR2GR_S: 4300fca6ea1SDimitry Andric case LoongArch::MOVFCSR2GR: 4310fca6ea1SDimitry Andric case LoongArch::MOVCF2GR: 4320fca6ea1SDimitry Andric case LoongArch::MOVFRH2GR_S: 4330fca6ea1SDimitry Andric case LoongArch::MOVFR2GR_S_64: 4340fca6ea1SDimitry Andric // TODO: Add vector 4350fca6ea1SDimitry Andric return true; 4360fca6ea1SDimitry Andric // Special cases that require checking operands. 4370fca6ea1SDimitry Andric // shifting right sufficiently makes the value 32-bit sign-extended 4380fca6ea1SDimitry Andric case LoongArch::SRAI_D: 4390fca6ea1SDimitry Andric return MI.getOperand(2).getImm() >= 32; 4400fca6ea1SDimitry Andric case LoongArch::SRLI_D: 4410fca6ea1SDimitry Andric return MI.getOperand(2).getImm() > 32; 4420fca6ea1SDimitry Andric // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended. 4430fca6ea1SDimitry Andric case LoongArch::ADDI_D: 4440fca6ea1SDimitry Andric case LoongArch::ORI: 4450fca6ea1SDimitry Andric return MI.getOperand(1).isReg() && 4460fca6ea1SDimitry Andric MI.getOperand(1).getReg() == LoongArch::R0; 4470fca6ea1SDimitry Andric // A bits extract is sign extended if the msb is less than 31. 4480fca6ea1SDimitry Andric case LoongArch::BSTRPICK_D: 4490fca6ea1SDimitry Andric return MI.getOperand(2).getImm() < 31; 4500fca6ea1SDimitry Andric // Copying from R0 produces zero. 4510fca6ea1SDimitry Andric case LoongArch::COPY: 4520fca6ea1SDimitry Andric return MI.getOperand(1).getReg() == LoongArch::R0; 4530fca6ea1SDimitry Andric // Ignore the scratch register destination. 4540fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicSwap32: 4550fca6ea1SDimitry Andric case LoongArch::PseudoAtomicSwap32: 4560fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadAdd32: 4570fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadSub32: 4580fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadNand32: 4590fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadNand32: 4600fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadAdd32: 4610fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadSub32: 4620fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadAnd32: 4630fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadOr32: 4640fca6ea1SDimitry Andric case LoongArch::PseudoAtomicLoadXor32: 4650fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadUMax32: 4660fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadUMin32: 4670fca6ea1SDimitry Andric case LoongArch::PseudoCmpXchg32: 4680fca6ea1SDimitry Andric case LoongArch::PseudoMaskedCmpXchg32: 4690fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadMax32: 4700fca6ea1SDimitry Andric case LoongArch::PseudoMaskedAtomicLoadMin32: 4710fca6ea1SDimitry Andric return OpNo == 0; 4720fca6ea1SDimitry Andric } 4730fca6ea1SDimitry Andric 4740fca6ea1SDimitry Andric return false; 4750fca6ea1SDimitry Andric } 4760fca6ea1SDimitry Andric 4770fca6ea1SDimitry Andric static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, 4780fca6ea1SDimitry Andric const MachineRegisterInfo &MRI, 4790fca6ea1SDimitry Andric SmallPtrSetImpl<MachineInstr *> &FixableDef) { 4800fca6ea1SDimitry Andric SmallSet<Register, 4> Visited; 4810fca6ea1SDimitry Andric SmallVector<Register, 4> Worklist; 4820fca6ea1SDimitry Andric 4830fca6ea1SDimitry Andric auto AddRegToWorkList = [&](Register SrcReg) { 4840fca6ea1SDimitry Andric if (!SrcReg.isVirtual()) 4850fca6ea1SDimitry Andric return false; 4860fca6ea1SDimitry Andric Worklist.push_back(SrcReg); 4870fca6ea1SDimitry Andric return true; 4880fca6ea1SDimitry Andric }; 4890fca6ea1SDimitry Andric 4900fca6ea1SDimitry Andric if (!AddRegToWorkList(SrcReg)) 4910fca6ea1SDimitry Andric return false; 4920fca6ea1SDimitry Andric 4930fca6ea1SDimitry Andric while (!Worklist.empty()) { 4940fca6ea1SDimitry Andric Register Reg = Worklist.pop_back_val(); 4950fca6ea1SDimitry Andric 4960fca6ea1SDimitry Andric // If we already visited this register, we don't need to check it again. 4970fca6ea1SDimitry Andric if (!Visited.insert(Reg).second) 4980fca6ea1SDimitry Andric continue; 4990fca6ea1SDimitry Andric 5000fca6ea1SDimitry Andric MachineInstr *MI = MRI.getVRegDef(Reg); 5010fca6ea1SDimitry Andric if (!MI) 5020fca6ea1SDimitry Andric continue; 5030fca6ea1SDimitry Andric 5040fca6ea1SDimitry Andric int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr); 5050fca6ea1SDimitry Andric assert(OpNo != -1 && "Couldn't find register"); 5060fca6ea1SDimitry Andric 5070fca6ea1SDimitry Andric // If this is a sign extending operation we don't need to look any further. 5080fca6ea1SDimitry Andric if (isSignExtendingOpW(*MI, MRI, OpNo)) 5090fca6ea1SDimitry Andric continue; 5100fca6ea1SDimitry Andric 5110fca6ea1SDimitry Andric // Is this an instruction that propagates sign extend? 5120fca6ea1SDimitry Andric switch (MI->getOpcode()) { 5130fca6ea1SDimitry Andric default: 5140fca6ea1SDimitry Andric // Unknown opcode, give up. 5150fca6ea1SDimitry Andric return false; 5160fca6ea1SDimitry Andric case LoongArch::COPY: { 5170fca6ea1SDimitry Andric const MachineFunction *MF = MI->getMF(); 5180fca6ea1SDimitry Andric const LoongArchMachineFunctionInfo *LAFI = 5190fca6ea1SDimitry Andric MF->getInfo<LoongArchMachineFunctionInfo>(); 5200fca6ea1SDimitry Andric 5210fca6ea1SDimitry Andric // If this is the entry block and the register is livein, see if we know 5220fca6ea1SDimitry Andric // it is sign extended. 5230fca6ea1SDimitry Andric if (MI->getParent() == &MF->front()) { 5240fca6ea1SDimitry Andric Register VReg = MI->getOperand(0).getReg(); 5250fca6ea1SDimitry Andric if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg)) 5260fca6ea1SDimitry Andric continue; 5270fca6ea1SDimitry Andric } 5280fca6ea1SDimitry Andric 5290fca6ea1SDimitry Andric Register CopySrcReg = MI->getOperand(1).getReg(); 5300fca6ea1SDimitry Andric if (CopySrcReg == LoongArch::R4) { 5310fca6ea1SDimitry Andric // For a method return value, we check the ZExt/SExt flags in attribute. 5320fca6ea1SDimitry Andric // We assume the following code sequence for method call. 5330fca6ea1SDimitry Andric // PseudoCALL @bar, ... 5340fca6ea1SDimitry Andric // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 5350fca6ea1SDimitry Andric // %0:gpr = COPY $r4 5360fca6ea1SDimitry Andric // 5370fca6ea1SDimitry Andric // We use the PseudoCall to look up the IR function being called to find 5380fca6ea1SDimitry Andric // its return attributes. 5390fca6ea1SDimitry Andric const MachineBasicBlock *MBB = MI->getParent(); 5400fca6ea1SDimitry Andric auto II = MI->getIterator(); 5410fca6ea1SDimitry Andric if (II == MBB->instr_begin() || 5420fca6ea1SDimitry Andric (--II)->getOpcode() != LoongArch::ADJCALLSTACKUP) 5430fca6ea1SDimitry Andric return false; 5440fca6ea1SDimitry Andric 5450fca6ea1SDimitry Andric const MachineInstr &CallMI = *(--II); 5460fca6ea1SDimitry Andric if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal()) 5470fca6ea1SDimitry Andric return false; 5480fca6ea1SDimitry Andric 5490fca6ea1SDimitry Andric auto *CalleeFn = 5500fca6ea1SDimitry Andric dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal()); 5510fca6ea1SDimitry Andric if (!CalleeFn) 5520fca6ea1SDimitry Andric return false; 5530fca6ea1SDimitry Andric 5540fca6ea1SDimitry Andric auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType()); 5550fca6ea1SDimitry Andric if (!IntTy) 5560fca6ea1SDimitry Andric return false; 5570fca6ea1SDimitry Andric 5580fca6ea1SDimitry Andric const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs(); 5590fca6ea1SDimitry Andric unsigned BitWidth = IntTy->getBitWidth(); 5600fca6ea1SDimitry Andric if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) || 5610fca6ea1SDimitry Andric (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt))) 5620fca6ea1SDimitry Andric continue; 5630fca6ea1SDimitry Andric } 5640fca6ea1SDimitry Andric 5650fca6ea1SDimitry Andric if (!AddRegToWorkList(CopySrcReg)) 5660fca6ea1SDimitry Andric return false; 5670fca6ea1SDimitry Andric 5680fca6ea1SDimitry Andric break; 5690fca6ea1SDimitry Andric } 5700fca6ea1SDimitry Andric 5710fca6ea1SDimitry Andric // For these, we just need to check if the 1st operand is sign extended. 5720fca6ea1SDimitry Andric case LoongArch::MOD_D: 5730fca6ea1SDimitry Andric case LoongArch::ANDI: 5740fca6ea1SDimitry Andric case LoongArch::ORI: 5750fca6ea1SDimitry Andric case LoongArch::XORI: 5760fca6ea1SDimitry Andric // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R. 5770fca6ea1SDimitry Andric // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1 5780fca6ea1SDimitry Andric // Logical operations use a sign extended 12-bit immediate. 5790fca6ea1SDimitry Andric if (!AddRegToWorkList(MI->getOperand(1).getReg())) 5800fca6ea1SDimitry Andric return false; 5810fca6ea1SDimitry Andric 5820fca6ea1SDimitry Andric break; 5830fca6ea1SDimitry Andric case LoongArch::MOD_DU: 5840fca6ea1SDimitry Andric case LoongArch::AND: 5850fca6ea1SDimitry Andric case LoongArch::OR: 5860fca6ea1SDimitry Andric case LoongArch::XOR: 5870fca6ea1SDimitry Andric case LoongArch::ANDN: 5880fca6ea1SDimitry Andric case LoongArch::ORN: 5890fca6ea1SDimitry Andric case LoongArch::PHI: { 5900fca6ea1SDimitry Andric // If all incoming values are sign-extended, the output of AND, OR, XOR, 5910fca6ea1SDimitry Andric // or PHI is also sign-extended. 5920fca6ea1SDimitry Andric 5930fca6ea1SDimitry Andric // The input registers for PHI are operand 1, 3, ... 5940fca6ea1SDimitry Andric // The input registers for others are operand 1 and 2. 5950fca6ea1SDimitry Andric unsigned B = 1, E = 3, D = 1; 5960fca6ea1SDimitry Andric switch (MI->getOpcode()) { 5970fca6ea1SDimitry Andric case LoongArch::PHI: 5980fca6ea1SDimitry Andric E = MI->getNumOperands(); 5990fca6ea1SDimitry Andric D = 2; 6000fca6ea1SDimitry Andric break; 6010fca6ea1SDimitry Andric } 6020fca6ea1SDimitry Andric 6030fca6ea1SDimitry Andric for (unsigned I = B; I != E; I += D) { 6040fca6ea1SDimitry Andric if (!MI->getOperand(I).isReg()) 6050fca6ea1SDimitry Andric return false; 6060fca6ea1SDimitry Andric 6070fca6ea1SDimitry Andric if (!AddRegToWorkList(MI->getOperand(I).getReg())) 6080fca6ea1SDimitry Andric return false; 6090fca6ea1SDimitry Andric } 6100fca6ea1SDimitry Andric 6110fca6ea1SDimitry Andric break; 6120fca6ea1SDimitry Andric } 6130fca6ea1SDimitry Andric 6140fca6ea1SDimitry Andric case LoongArch::MASKEQZ: 6150fca6ea1SDimitry Andric case LoongArch::MASKNEZ: 6160fca6ea1SDimitry Andric // Instructions return zero or operand 1. Result is sign extended if 6170fca6ea1SDimitry Andric // operand 1 is sign extended. 6180fca6ea1SDimitry Andric if (!AddRegToWorkList(MI->getOperand(1).getReg())) 6190fca6ea1SDimitry Andric return false; 6200fca6ea1SDimitry Andric break; 6210fca6ea1SDimitry Andric 6220fca6ea1SDimitry Andric // With these opcode, we can "fix" them with the W-version 6230fca6ea1SDimitry Andric // if we know all users of the result only rely on bits 31:0 6240fca6ea1SDimitry Andric case LoongArch::SLLI_D: 6250fca6ea1SDimitry Andric // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits 6260fca6ea1SDimitry Andric if (MI->getOperand(2).getImm() >= 32) 6270fca6ea1SDimitry Andric return false; 6280fca6ea1SDimitry Andric [[fallthrough]]; 6290fca6ea1SDimitry Andric case LoongArch::ADDI_D: 6300fca6ea1SDimitry Andric case LoongArch::ADD_D: 6310fca6ea1SDimitry Andric case LoongArch::LD_D: 6320fca6ea1SDimitry Andric case LoongArch::LD_WU: 6330fca6ea1SDimitry Andric case LoongArch::MUL_D: 6340fca6ea1SDimitry Andric case LoongArch::SUB_D: 6350fca6ea1SDimitry Andric if (hasAllWUsers(*MI, ST, MRI)) { 6360fca6ea1SDimitry Andric FixableDef.insert(MI); 6370fca6ea1SDimitry Andric break; 6380fca6ea1SDimitry Andric } 6390fca6ea1SDimitry Andric return false; 640*6e516c87SDimitry Andric // If all incoming values are sign-extended and all users only use 641*6e516c87SDimitry Andric // the lower 32 bits, then convert them to W versions. 642*6e516c87SDimitry Andric case LoongArch::DIV_D: { 643*6e516c87SDimitry Andric if (!AddRegToWorkList(MI->getOperand(1).getReg())) 644*6e516c87SDimitry Andric return false; 645*6e516c87SDimitry Andric if (!AddRegToWorkList(MI->getOperand(2).getReg())) 646*6e516c87SDimitry Andric return false; 647*6e516c87SDimitry Andric if (hasAllWUsers(*MI, ST, MRI)) { 648*6e516c87SDimitry Andric FixableDef.insert(MI); 649*6e516c87SDimitry Andric break; 650*6e516c87SDimitry Andric } 651*6e516c87SDimitry Andric return false; 652*6e516c87SDimitry Andric } 6530fca6ea1SDimitry Andric } 6540fca6ea1SDimitry Andric } 6550fca6ea1SDimitry Andric 6560fca6ea1SDimitry Andric // If we get here, then every node we visited produces a sign extended value 6570fca6ea1SDimitry Andric // or propagated sign extended values. So the result must be sign extended. 6580fca6ea1SDimitry Andric return true; 6590fca6ea1SDimitry Andric } 6600fca6ea1SDimitry Andric 6610fca6ea1SDimitry Andric static unsigned getWOp(unsigned Opcode) { 6620fca6ea1SDimitry Andric switch (Opcode) { 6630fca6ea1SDimitry Andric case LoongArch::ADDI_D: 6640fca6ea1SDimitry Andric return LoongArch::ADDI_W; 6650fca6ea1SDimitry Andric case LoongArch::ADD_D: 6660fca6ea1SDimitry Andric return LoongArch::ADD_W; 667*6e516c87SDimitry Andric case LoongArch::DIV_D: 668*6e516c87SDimitry Andric return LoongArch::DIV_W; 6690fca6ea1SDimitry Andric case LoongArch::LD_D: 6700fca6ea1SDimitry Andric case LoongArch::LD_WU: 6710fca6ea1SDimitry Andric return LoongArch::LD_W; 6720fca6ea1SDimitry Andric case LoongArch::MUL_D: 6730fca6ea1SDimitry Andric return LoongArch::MUL_W; 6740fca6ea1SDimitry Andric case LoongArch::SLLI_D: 6750fca6ea1SDimitry Andric return LoongArch::SLLI_W; 6760fca6ea1SDimitry Andric case LoongArch::SUB_D: 6770fca6ea1SDimitry Andric return LoongArch::SUB_W; 6780fca6ea1SDimitry Andric default: 6790fca6ea1SDimitry Andric llvm_unreachable("Unexpected opcode for replacement with W variant"); 6800fca6ea1SDimitry Andric } 6810fca6ea1SDimitry Andric } 6820fca6ea1SDimitry Andric 6830fca6ea1SDimitry Andric bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF, 6840fca6ea1SDimitry Andric const LoongArchInstrInfo &TII, 6850fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 6860fca6ea1SDimitry Andric MachineRegisterInfo &MRI) { 6870fca6ea1SDimitry Andric if (DisableSExtWRemoval) 6880fca6ea1SDimitry Andric return false; 6890fca6ea1SDimitry Andric 6900fca6ea1SDimitry Andric bool MadeChange = false; 6910fca6ea1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 6920fca6ea1SDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 6930fca6ea1SDimitry Andric // We're looking for the sext.w pattern ADDI.W rd, rs, 0. 6940fca6ea1SDimitry Andric if (!LoongArch::isSEXT_W(MI)) 6950fca6ea1SDimitry Andric continue; 6960fca6ea1SDimitry Andric 6970fca6ea1SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 6980fca6ea1SDimitry Andric 6990fca6ea1SDimitry Andric SmallPtrSet<MachineInstr *, 4> FixableDefs; 7000fca6ea1SDimitry Andric 7010fca6ea1SDimitry Andric // If all users only use the lower bits, this sext.w is redundant. 7020fca6ea1SDimitry Andric // Or if all definitions reaching MI sign-extend their output, 7030fca6ea1SDimitry Andric // then sext.w is redundant. 7040fca6ea1SDimitry Andric if (!hasAllWUsers(MI, ST, MRI) && 7050fca6ea1SDimitry Andric !isSignExtendedW(SrcReg, ST, MRI, FixableDefs)) 7060fca6ea1SDimitry Andric continue; 7070fca6ea1SDimitry Andric 7080fca6ea1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 7090fca6ea1SDimitry Andric if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg))) 7100fca6ea1SDimitry Andric continue; 7110fca6ea1SDimitry Andric 7120fca6ea1SDimitry Andric // Convert Fixable instructions to their W versions. 7130fca6ea1SDimitry Andric for (MachineInstr *Fixable : FixableDefs) { 7140fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing " << *Fixable); 7150fca6ea1SDimitry Andric Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode()))); 7160fca6ea1SDimitry Andric Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap); 7170fca6ea1SDimitry Andric Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap); 7180fca6ea1SDimitry Andric Fixable->clearFlag(MachineInstr::MIFlag::IsExact); 7190fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << *Fixable); 7200fca6ea1SDimitry Andric ++NumTransformedToWInstrs; 7210fca6ea1SDimitry Andric } 7220fca6ea1SDimitry Andric 7230fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); 7240fca6ea1SDimitry Andric MRI.replaceRegWith(DstReg, SrcReg); 7250fca6ea1SDimitry Andric MRI.clearKillFlags(SrcReg); 7260fca6ea1SDimitry Andric MI.eraseFromParent(); 7270fca6ea1SDimitry Andric ++NumRemovedSExtW; 7280fca6ea1SDimitry Andric MadeChange = true; 7290fca6ea1SDimitry Andric } 7300fca6ea1SDimitry Andric } 7310fca6ea1SDimitry Andric 7320fca6ea1SDimitry Andric return MadeChange; 7330fca6ea1SDimitry Andric } 7340fca6ea1SDimitry Andric 7350fca6ea1SDimitry Andric bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF, 7360fca6ea1SDimitry Andric const LoongArchInstrInfo &TII, 7370fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 7380fca6ea1SDimitry Andric MachineRegisterInfo &MRI) { 7390fca6ea1SDimitry Andric bool MadeChange = false; 7400fca6ea1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 7410fca6ea1SDimitry Andric for (MachineInstr &MI : MBB) { 7420fca6ea1SDimitry Andric unsigned Opc; 7430fca6ea1SDimitry Andric switch (MI.getOpcode()) { 7440fca6ea1SDimitry Andric default: 7450fca6ea1SDimitry Andric continue; 7460fca6ea1SDimitry Andric case LoongArch::ADDI_W: 7470fca6ea1SDimitry Andric Opc = LoongArch::ADDI_D; 7480fca6ea1SDimitry Andric break; 7490fca6ea1SDimitry Andric } 7500fca6ea1SDimitry Andric 7510fca6ea1SDimitry Andric if (hasAllWUsers(MI, ST, MRI)) { 7520fca6ea1SDimitry Andric MI.setDesc(TII.get(Opc)); 7530fca6ea1SDimitry Andric MadeChange = true; 7540fca6ea1SDimitry Andric } 7550fca6ea1SDimitry Andric } 7560fca6ea1SDimitry Andric } 7570fca6ea1SDimitry Andric 7580fca6ea1SDimitry Andric return MadeChange; 7590fca6ea1SDimitry Andric } 7600fca6ea1SDimitry Andric 7610fca6ea1SDimitry Andric bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF, 7620fca6ea1SDimitry Andric const LoongArchInstrInfo &TII, 7630fca6ea1SDimitry Andric const LoongArchSubtarget &ST, 7640fca6ea1SDimitry Andric MachineRegisterInfo &MRI) { 7650fca6ea1SDimitry Andric bool MadeChange = false; 7660fca6ea1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 7670fca6ea1SDimitry Andric for (MachineInstr &MI : MBB) { 7680fca6ea1SDimitry Andric unsigned WOpc; 7690fca6ea1SDimitry Andric // TODO: Add more? 7700fca6ea1SDimitry Andric switch (MI.getOpcode()) { 7710fca6ea1SDimitry Andric default: 7720fca6ea1SDimitry Andric continue; 7730fca6ea1SDimitry Andric case LoongArch::ADD_D: 7740fca6ea1SDimitry Andric WOpc = LoongArch::ADD_W; 7750fca6ea1SDimitry Andric break; 7760fca6ea1SDimitry Andric case LoongArch::ADDI_D: 7770fca6ea1SDimitry Andric WOpc = LoongArch::ADDI_W; 7780fca6ea1SDimitry Andric break; 7790fca6ea1SDimitry Andric case LoongArch::SUB_D: 7800fca6ea1SDimitry Andric WOpc = LoongArch::SUB_W; 7810fca6ea1SDimitry Andric break; 7820fca6ea1SDimitry Andric case LoongArch::MUL_D: 7830fca6ea1SDimitry Andric WOpc = LoongArch::MUL_W; 7840fca6ea1SDimitry Andric break; 7850fca6ea1SDimitry Andric case LoongArch::SLLI_D: 7860fca6ea1SDimitry Andric // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits 7870fca6ea1SDimitry Andric if (MI.getOperand(2).getImm() >= 32) 7880fca6ea1SDimitry Andric continue; 7890fca6ea1SDimitry Andric WOpc = LoongArch::SLLI_W; 7900fca6ea1SDimitry Andric break; 7910fca6ea1SDimitry Andric case LoongArch::LD_D: 7920fca6ea1SDimitry Andric case LoongArch::LD_WU: 7930fca6ea1SDimitry Andric WOpc = LoongArch::LD_W; 7940fca6ea1SDimitry Andric break; 7950fca6ea1SDimitry Andric } 7960fca6ea1SDimitry Andric 7970fca6ea1SDimitry Andric if (hasAllWUsers(MI, ST, MRI)) { 7980fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing " << MI); 7990fca6ea1SDimitry Andric MI.setDesc(TII.get(WOpc)); 8000fca6ea1SDimitry Andric MI.clearFlag(MachineInstr::MIFlag::NoSWrap); 8010fca6ea1SDimitry Andric MI.clearFlag(MachineInstr::MIFlag::NoUWrap); 8020fca6ea1SDimitry Andric MI.clearFlag(MachineInstr::MIFlag::IsExact); 8030fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << MI); 8040fca6ea1SDimitry Andric ++NumTransformedToWInstrs; 8050fca6ea1SDimitry Andric MadeChange = true; 8060fca6ea1SDimitry Andric } 8070fca6ea1SDimitry Andric } 8080fca6ea1SDimitry Andric } 8090fca6ea1SDimitry Andric 8100fca6ea1SDimitry Andric return MadeChange; 8110fca6ea1SDimitry Andric } 8120fca6ea1SDimitry Andric 8130fca6ea1SDimitry Andric bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) { 8140fca6ea1SDimitry Andric if (skipFunction(MF.getFunction())) 8150fca6ea1SDimitry Andric return false; 8160fca6ea1SDimitry Andric 8170fca6ea1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 8180fca6ea1SDimitry Andric const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>(); 8190fca6ea1SDimitry Andric const LoongArchInstrInfo &TII = *ST.getInstrInfo(); 8200fca6ea1SDimitry Andric 8210fca6ea1SDimitry Andric if (!ST.is64Bit()) 8220fca6ea1SDimitry Andric return false; 8230fca6ea1SDimitry Andric 8240fca6ea1SDimitry Andric bool MadeChange = false; 8250fca6ea1SDimitry Andric MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI); 8260fca6ea1SDimitry Andric 8270fca6ea1SDimitry Andric if (!(DisableCvtToDSuffix || ST.preferWInst())) 8280fca6ea1SDimitry Andric MadeChange |= convertToDSuffixes(MF, TII, ST, MRI); 8290fca6ea1SDimitry Andric 8300fca6ea1SDimitry Andric if (ST.preferWInst()) 8310fca6ea1SDimitry Andric MadeChange |= convertToWSuffixes(MF, TII, ST, MRI); 8320fca6ea1SDimitry Andric 8330fca6ea1SDimitry Andric return MadeChange; 8340fca6ea1SDimitry Andric } 835