xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp (revision 6e516c87b6d779911edde7481d8aef165b837a03)
10fca6ea1SDimitry Andric //===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//
20fca6ea1SDimitry Andric //
30fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60fca6ea1SDimitry Andric //
70fca6ea1SDimitry Andric //===---------------------------------------------------------------------===//
80fca6ea1SDimitry Andric //
90fca6ea1SDimitry Andric // This pass does some optimizations for *W instructions at the MI level.
100fca6ea1SDimitry Andric //
110fca6ea1SDimitry Andric // First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either
120fca6ea1SDimitry Andric // because the sign extended bits aren't consumed or because the input was
130fca6ea1SDimitry Andric // already sign extended by an earlier instruction.
140fca6ea1SDimitry Andric //
150fca6ea1SDimitry Andric // Then:
160fca6ea1SDimitry Andric // 1. Unless explicit disabled or the target prefers instructions with W suffix,
170fca6ea1SDimitry Andric //    it removes the -w suffix from opw instructions whenever all users are
180fca6ea1SDimitry Andric //    dependent only on the lower word of the result of the instruction.
190fca6ea1SDimitry Andric //    The cases handled are:
200fca6ea1SDimitry Andric //    * addi.w because it helps reduce test differences between LA32 and LA64
210fca6ea1SDimitry Andric //      w/o being a pessimization.
220fca6ea1SDimitry Andric //
230fca6ea1SDimitry Andric // 2. Or if explicit enabled or the target prefers instructions with W suffix,
240fca6ea1SDimitry Andric //    it adds the W suffix to the instruction whenever all users are dependent
250fca6ea1SDimitry Andric //    only on the lower word of the result of the instruction.
260fca6ea1SDimitry Andric //    The cases handled are:
270fca6ea1SDimitry Andric //    * add.d/addi.d/sub.d/mul.d.
280fca6ea1SDimitry Andric //    * slli.d with imm < 32.
290fca6ea1SDimitry Andric //    * ld.d/ld.wu.
300fca6ea1SDimitry Andric //===---------------------------------------------------------------------===//
310fca6ea1SDimitry Andric 
320fca6ea1SDimitry Andric #include "LoongArch.h"
330fca6ea1SDimitry Andric #include "LoongArchMachineFunctionInfo.h"
340fca6ea1SDimitry Andric #include "LoongArchSubtarget.h"
350fca6ea1SDimitry Andric #include "llvm/ADT/SmallSet.h"
360fca6ea1SDimitry Andric #include "llvm/ADT/Statistic.h"
370fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
380fca6ea1SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
390fca6ea1SDimitry Andric 
400fca6ea1SDimitry Andric using namespace llvm;
410fca6ea1SDimitry Andric 
420fca6ea1SDimitry Andric #define DEBUG_TYPE "loongarch-opt-w-instrs"
430fca6ea1SDimitry Andric #define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
440fca6ea1SDimitry Andric 
450fca6ea1SDimitry Andric STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
460fca6ea1SDimitry Andric STATISTIC(NumTransformedToWInstrs,
470fca6ea1SDimitry Andric           "Number of instructions transformed to W-ops");
480fca6ea1SDimitry Andric 
490fca6ea1SDimitry Andric static cl::opt<bool>
500fca6ea1SDimitry Andric     DisableSExtWRemoval("loongarch-disable-sextw-removal",
510fca6ea1SDimitry Andric                         cl::desc("Disable removal of sign-extend insn"),
520fca6ea1SDimitry Andric                         cl::init(false), cl::Hidden);
530fca6ea1SDimitry Andric static cl::opt<bool>
540fca6ea1SDimitry Andric     DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",
550fca6ea1SDimitry Andric                         cl::desc("Disable convert to D suffix"),
560fca6ea1SDimitry Andric                         cl::init(false), cl::Hidden);
570fca6ea1SDimitry Andric 
580fca6ea1SDimitry Andric namespace {
590fca6ea1SDimitry Andric 
600fca6ea1SDimitry Andric class LoongArchOptWInstrs : public MachineFunctionPass {
610fca6ea1SDimitry Andric public:
620fca6ea1SDimitry Andric   static char ID;
630fca6ea1SDimitry Andric 
640fca6ea1SDimitry Andric   LoongArchOptWInstrs() : MachineFunctionPass(ID) {}
650fca6ea1SDimitry Andric 
660fca6ea1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
670fca6ea1SDimitry Andric   bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII,
680fca6ea1SDimitry Andric                          const LoongArchSubtarget &ST,
690fca6ea1SDimitry Andric                          MachineRegisterInfo &MRI);
700fca6ea1SDimitry Andric   bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
710fca6ea1SDimitry Andric                           const LoongArchSubtarget &ST,
720fca6ea1SDimitry Andric                           MachineRegisterInfo &MRI);
730fca6ea1SDimitry Andric   bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
740fca6ea1SDimitry Andric                           const LoongArchSubtarget &ST,
750fca6ea1SDimitry Andric                           MachineRegisterInfo &MRI);
760fca6ea1SDimitry Andric 
770fca6ea1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
780fca6ea1SDimitry Andric     AU.setPreservesCFG();
790fca6ea1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
800fca6ea1SDimitry Andric   }
810fca6ea1SDimitry Andric 
820fca6ea1SDimitry Andric   StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; }
830fca6ea1SDimitry Andric };
840fca6ea1SDimitry Andric 
850fca6ea1SDimitry Andric } // end anonymous namespace
860fca6ea1SDimitry Andric 
870fca6ea1SDimitry Andric char LoongArchOptWInstrs::ID = 0;
880fca6ea1SDimitry Andric INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME,
890fca6ea1SDimitry Andric                 false, false)
900fca6ea1SDimitry Andric 
910fca6ea1SDimitry Andric FunctionPass *llvm::createLoongArchOptWInstrsPass() {
920fca6ea1SDimitry Andric   return new LoongArchOptWInstrs();
930fca6ea1SDimitry Andric }
940fca6ea1SDimitry Andric 
950fca6ea1SDimitry Andric // Checks if all users only demand the lower \p OrigBits of the original
960fca6ea1SDimitry Andric // instruction's result.
970fca6ea1SDimitry Andric // TODO: handle multiple interdependent transformations
980fca6ea1SDimitry Andric static bool hasAllNBitUsers(const MachineInstr &OrigMI,
990fca6ea1SDimitry Andric                             const LoongArchSubtarget &ST,
1000fca6ea1SDimitry Andric                             const MachineRegisterInfo &MRI, unsigned OrigBits) {
1010fca6ea1SDimitry Andric 
1020fca6ea1SDimitry Andric   SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;
1030fca6ea1SDimitry Andric   SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;
1040fca6ea1SDimitry Andric 
1050fca6ea1SDimitry Andric   Worklist.push_back(std::make_pair(&OrigMI, OrigBits));
1060fca6ea1SDimitry Andric 
1070fca6ea1SDimitry Andric   while (!Worklist.empty()) {
1080fca6ea1SDimitry Andric     auto P = Worklist.pop_back_val();
1090fca6ea1SDimitry Andric     const MachineInstr *MI = P.first;
1100fca6ea1SDimitry Andric     unsigned Bits = P.second;
1110fca6ea1SDimitry Andric 
1120fca6ea1SDimitry Andric     if (!Visited.insert(P).second)
1130fca6ea1SDimitry Andric       continue;
1140fca6ea1SDimitry Andric 
1150fca6ea1SDimitry Andric     // Only handle instructions with one def.
1160fca6ea1SDimitry Andric     if (MI->getNumExplicitDefs() != 1)
1170fca6ea1SDimitry Andric       return false;
1180fca6ea1SDimitry Andric 
1190fca6ea1SDimitry Andric     Register DestReg = MI->getOperand(0).getReg();
1200fca6ea1SDimitry Andric     if (!DestReg.isVirtual())
1210fca6ea1SDimitry Andric       return false;
1220fca6ea1SDimitry Andric 
1230fca6ea1SDimitry Andric     for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) {
1240fca6ea1SDimitry Andric       const MachineInstr *UserMI = UserOp.getParent();
1250fca6ea1SDimitry Andric       unsigned OpIdx = UserOp.getOperandNo();
1260fca6ea1SDimitry Andric 
1270fca6ea1SDimitry Andric       switch (UserMI->getOpcode()) {
1280fca6ea1SDimitry Andric       default:
1290fca6ea1SDimitry Andric         // TODO: Add vector
1300fca6ea1SDimitry Andric         return false;
1310fca6ea1SDimitry Andric 
1320fca6ea1SDimitry Andric       case LoongArch::ADD_W:
1330fca6ea1SDimitry Andric       case LoongArch::ADDI_W:
1340fca6ea1SDimitry Andric       case LoongArch::SUB_W:
1350fca6ea1SDimitry Andric       case LoongArch::ALSL_W:
1360fca6ea1SDimitry Andric       case LoongArch::ALSL_WU:
1370fca6ea1SDimitry Andric       case LoongArch::MUL_W:
1380fca6ea1SDimitry Andric       case LoongArch::MULH_W:
1390fca6ea1SDimitry Andric       case LoongArch::MULH_WU:
1400fca6ea1SDimitry Andric       case LoongArch::MULW_D_W:
1410fca6ea1SDimitry Andric       case LoongArch::MULW_D_WU:
1420fca6ea1SDimitry Andric       // TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+.
1430fca6ea1SDimitry Andric       // case LoongArch::DIV_W:
1440fca6ea1SDimitry Andric       // case LoongArch::DIV_WU:
1450fca6ea1SDimitry Andric       // case LoongArch::MOD_W:
1460fca6ea1SDimitry Andric       // case LoongArch::MOD_WU:
1470fca6ea1SDimitry Andric       case LoongArch::SLL_W:
1480fca6ea1SDimitry Andric       case LoongArch::SLLI_W:
1490fca6ea1SDimitry Andric       case LoongArch::SRL_W:
1500fca6ea1SDimitry Andric       case LoongArch::SRLI_W:
1510fca6ea1SDimitry Andric       case LoongArch::SRA_W:
1520fca6ea1SDimitry Andric       case LoongArch::SRAI_W:
1530fca6ea1SDimitry Andric       case LoongArch::ROTR_W:
1540fca6ea1SDimitry Andric       case LoongArch::ROTRI_W:
1550fca6ea1SDimitry Andric       case LoongArch::CLO_W:
1560fca6ea1SDimitry Andric       case LoongArch::CLZ_W:
1570fca6ea1SDimitry Andric       case LoongArch::CTO_W:
1580fca6ea1SDimitry Andric       case LoongArch::CTZ_W:
1590fca6ea1SDimitry Andric       case LoongArch::BYTEPICK_W:
1600fca6ea1SDimitry Andric       case LoongArch::REVB_2H:
1610fca6ea1SDimitry Andric       case LoongArch::BITREV_4B:
1620fca6ea1SDimitry Andric       case LoongArch::BITREV_W:
1630fca6ea1SDimitry Andric       case LoongArch::BSTRINS_W:
1640fca6ea1SDimitry Andric       case LoongArch::BSTRPICK_W:
1650fca6ea1SDimitry Andric       case LoongArch::CRC_W_W_W:
1660fca6ea1SDimitry Andric       case LoongArch::CRCC_W_W_W:
1670fca6ea1SDimitry Andric       case LoongArch::MOVGR2FCSR:
1680fca6ea1SDimitry Andric       case LoongArch::MOVGR2FRH_W:
1690fca6ea1SDimitry Andric       case LoongArch::MOVGR2FR_W_64:
1700fca6ea1SDimitry Andric         if (Bits >= 32)
1710fca6ea1SDimitry Andric           break;
1720fca6ea1SDimitry Andric         return false;
1730fca6ea1SDimitry Andric       case LoongArch::MOVGR2CF:
1740fca6ea1SDimitry Andric         if (Bits >= 1)
1750fca6ea1SDimitry Andric           break;
1760fca6ea1SDimitry Andric         return false;
1770fca6ea1SDimitry Andric       case LoongArch::EXT_W_B:
1780fca6ea1SDimitry Andric         if (Bits >= 8)
1790fca6ea1SDimitry Andric           break;
1800fca6ea1SDimitry Andric         return false;
1810fca6ea1SDimitry Andric       case LoongArch::EXT_W_H:
1820fca6ea1SDimitry Andric         if (Bits >= 16)
1830fca6ea1SDimitry Andric           break;
1840fca6ea1SDimitry Andric         return false;
1850fca6ea1SDimitry Andric 
1860fca6ea1SDimitry Andric       case LoongArch::SRLI_D: {
1870fca6ea1SDimitry Andric         // If we are shifting right by less than Bits, and users don't demand
1880fca6ea1SDimitry Andric         // any bits that were shifted into [Bits-1:0], then we can consider this
1890fca6ea1SDimitry Andric         // as an N-Bit user.
1900fca6ea1SDimitry Andric         unsigned ShAmt = UserMI->getOperand(2).getImm();
1910fca6ea1SDimitry Andric         if (Bits > ShAmt) {
1920fca6ea1SDimitry Andric           Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));
1930fca6ea1SDimitry Andric           break;
1940fca6ea1SDimitry Andric         }
1950fca6ea1SDimitry Andric         return false;
1960fca6ea1SDimitry Andric       }
1970fca6ea1SDimitry Andric 
1980fca6ea1SDimitry Andric       // these overwrite higher input bits, otherwise the lower word of output
1990fca6ea1SDimitry Andric       // depends only on the lower word of input. So check their uses read W.
2000fca6ea1SDimitry Andric       case LoongArch::SLLI_D:
2010fca6ea1SDimitry Andric         if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm()))
2020fca6ea1SDimitry Andric           break;
2030fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
2040fca6ea1SDimitry Andric         break;
2050fca6ea1SDimitry Andric       case LoongArch::ANDI: {
2060fca6ea1SDimitry Andric         uint64_t Imm = UserMI->getOperand(2).getImm();
2070fca6ea1SDimitry Andric         if (Bits >= (unsigned)llvm::bit_width(Imm))
2080fca6ea1SDimitry Andric           break;
2090fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
2100fca6ea1SDimitry Andric         break;
2110fca6ea1SDimitry Andric       }
2120fca6ea1SDimitry Andric       case LoongArch::ORI: {
2130fca6ea1SDimitry Andric         uint64_t Imm = UserMI->getOperand(2).getImm();
2140fca6ea1SDimitry Andric         if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
2150fca6ea1SDimitry Andric           break;
2160fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
2170fca6ea1SDimitry Andric         break;
2180fca6ea1SDimitry Andric       }
2190fca6ea1SDimitry Andric 
2200fca6ea1SDimitry Andric       case LoongArch::SLL_D:
2210fca6ea1SDimitry Andric         // Operand 2 is the shift amount which uses log2(grlen) bits.
2220fca6ea1SDimitry Andric         if (OpIdx == 2) {
2230fca6ea1SDimitry Andric           if (Bits >= Log2_32(ST.getGRLen()))
2240fca6ea1SDimitry Andric             break;
2250fca6ea1SDimitry Andric           return false;
2260fca6ea1SDimitry Andric         }
2270fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
2280fca6ea1SDimitry Andric         break;
2290fca6ea1SDimitry Andric 
2300fca6ea1SDimitry Andric       case LoongArch::SRA_D:
2310fca6ea1SDimitry Andric       case LoongArch::SRL_D:
2320fca6ea1SDimitry Andric       case LoongArch::ROTR_D:
2330fca6ea1SDimitry Andric         // Operand 2 is the shift amount which uses 6 bits.
2340fca6ea1SDimitry Andric         if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen()))
2350fca6ea1SDimitry Andric           break;
2360fca6ea1SDimitry Andric         return false;
2370fca6ea1SDimitry Andric 
2380fca6ea1SDimitry Andric       case LoongArch::ST_B:
2390fca6ea1SDimitry Andric       case LoongArch::STX_B:
2400fca6ea1SDimitry Andric       case LoongArch::STGT_B:
2410fca6ea1SDimitry Andric       case LoongArch::STLE_B:
2420fca6ea1SDimitry Andric       case LoongArch::IOCSRWR_B:
2430fca6ea1SDimitry Andric         // The first argument is the value to store.
2440fca6ea1SDimitry Andric         if (OpIdx == 0 && Bits >= 8)
2450fca6ea1SDimitry Andric           break;
2460fca6ea1SDimitry Andric         return false;
2470fca6ea1SDimitry Andric       case LoongArch::ST_H:
2480fca6ea1SDimitry Andric       case LoongArch::STX_H:
2490fca6ea1SDimitry Andric       case LoongArch::STGT_H:
2500fca6ea1SDimitry Andric       case LoongArch::STLE_H:
2510fca6ea1SDimitry Andric       case LoongArch::IOCSRWR_H:
2520fca6ea1SDimitry Andric         // The first argument is the value to store.
2530fca6ea1SDimitry Andric         if (OpIdx == 0 && Bits >= 16)
2540fca6ea1SDimitry Andric           break;
2550fca6ea1SDimitry Andric         return false;
2560fca6ea1SDimitry Andric       case LoongArch::ST_W:
2570fca6ea1SDimitry Andric       case LoongArch::STX_W:
2580fca6ea1SDimitry Andric       case LoongArch::SCREL_W:
2590fca6ea1SDimitry Andric       case LoongArch::STPTR_W:
2600fca6ea1SDimitry Andric       case LoongArch::STGT_W:
2610fca6ea1SDimitry Andric       case LoongArch::STLE_W:
2620fca6ea1SDimitry Andric       case LoongArch::IOCSRWR_W:
2630fca6ea1SDimitry Andric         // The first argument is the value to store.
2640fca6ea1SDimitry Andric         if (OpIdx == 0 && Bits >= 32)
2650fca6ea1SDimitry Andric           break;
2660fca6ea1SDimitry Andric         return false;
2670fca6ea1SDimitry Andric 
2680fca6ea1SDimitry Andric       case LoongArch::CRC_W_B_W:
2690fca6ea1SDimitry Andric       case LoongArch::CRCC_W_B_W:
2700fca6ea1SDimitry Andric         if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32))
2710fca6ea1SDimitry Andric           break;
2720fca6ea1SDimitry Andric         return false;
2730fca6ea1SDimitry Andric       case LoongArch::CRC_W_H_W:
2740fca6ea1SDimitry Andric       case LoongArch::CRCC_W_H_W:
2750fca6ea1SDimitry Andric         if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32))
2760fca6ea1SDimitry Andric           break;
2770fca6ea1SDimitry Andric         return false;
2780fca6ea1SDimitry Andric       case LoongArch::CRC_W_D_W:
2790fca6ea1SDimitry Andric       case LoongArch::CRCC_W_D_W:
2800fca6ea1SDimitry Andric         if (OpIdx == 2 && Bits >= 32)
2810fca6ea1SDimitry Andric           break;
2820fca6ea1SDimitry Andric         return false;
2830fca6ea1SDimitry Andric 
2840fca6ea1SDimitry Andric       // For these, lower word of output in these operations, depends only on
2850fca6ea1SDimitry Andric       // the lower word of input. So, we check all uses only read lower word.
2860fca6ea1SDimitry Andric       case LoongArch::COPY:
2870fca6ea1SDimitry Andric       case LoongArch::PHI:
2880fca6ea1SDimitry Andric       case LoongArch::ADD_D:
2890fca6ea1SDimitry Andric       case LoongArch::ADDI_D:
2900fca6ea1SDimitry Andric       case LoongArch::SUB_D:
2910fca6ea1SDimitry Andric       case LoongArch::MUL_D:
2920fca6ea1SDimitry Andric       case LoongArch::AND:
2930fca6ea1SDimitry Andric       case LoongArch::OR:
2940fca6ea1SDimitry Andric       case LoongArch::NOR:
2950fca6ea1SDimitry Andric       case LoongArch::XOR:
2960fca6ea1SDimitry Andric       case LoongArch::XORI:
2970fca6ea1SDimitry Andric       case LoongArch::ANDN:
2980fca6ea1SDimitry Andric       case LoongArch::ORN:
2990fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
3000fca6ea1SDimitry Andric         break;
3010fca6ea1SDimitry Andric 
3020fca6ea1SDimitry Andric       case LoongArch::MASKNEZ:
3030fca6ea1SDimitry Andric       case LoongArch::MASKEQZ:
3040fca6ea1SDimitry Andric         if (OpIdx != 1)
3050fca6ea1SDimitry Andric           return false;
3060fca6ea1SDimitry Andric         Worklist.push_back(std::make_pair(UserMI, Bits));
3070fca6ea1SDimitry Andric         break;
3080fca6ea1SDimitry Andric       }
3090fca6ea1SDimitry Andric     }
3100fca6ea1SDimitry Andric   }
3110fca6ea1SDimitry Andric 
3120fca6ea1SDimitry Andric   return true;
3130fca6ea1SDimitry Andric }
3140fca6ea1SDimitry Andric 
3150fca6ea1SDimitry Andric static bool hasAllWUsers(const MachineInstr &OrigMI,
3160fca6ea1SDimitry Andric                          const LoongArchSubtarget &ST,
3170fca6ea1SDimitry Andric                          const MachineRegisterInfo &MRI) {
3180fca6ea1SDimitry Andric   return hasAllNBitUsers(OrigMI, ST, MRI, 32);
3190fca6ea1SDimitry Andric }
3200fca6ea1SDimitry Andric 
3210fca6ea1SDimitry Andric // This function returns true if the machine instruction always outputs a value
3220fca6ea1SDimitry Andric // where bits 63:32 match bit 31.
3230fca6ea1SDimitry Andric static bool isSignExtendingOpW(const MachineInstr &MI,
3240fca6ea1SDimitry Andric                                const MachineRegisterInfo &MRI, unsigned OpNo) {
3250fca6ea1SDimitry Andric   switch (MI.getOpcode()) {
3260fca6ea1SDimitry Andric   // Normal cases
3270fca6ea1SDimitry Andric   case LoongArch::ADD_W:
3280fca6ea1SDimitry Andric   case LoongArch::SUB_W:
3290fca6ea1SDimitry Andric   case LoongArch::ADDI_W:
3300fca6ea1SDimitry Andric   case LoongArch::ALSL_W:
3310fca6ea1SDimitry Andric   case LoongArch::LU12I_W:
3320fca6ea1SDimitry Andric   case LoongArch::SLT:
3330fca6ea1SDimitry Andric   case LoongArch::SLTU:
3340fca6ea1SDimitry Andric   case LoongArch::SLTI:
3350fca6ea1SDimitry Andric   case LoongArch::SLTUI:
3360fca6ea1SDimitry Andric   case LoongArch::ANDI:
3370fca6ea1SDimitry Andric   case LoongArch::MUL_W:
3380fca6ea1SDimitry Andric   case LoongArch::MULH_W:
3390fca6ea1SDimitry Andric   case LoongArch::MULH_WU:
3400fca6ea1SDimitry Andric   case LoongArch::DIV_W:
3410fca6ea1SDimitry Andric   case LoongArch::MOD_W:
3420fca6ea1SDimitry Andric   case LoongArch::DIV_WU:
3430fca6ea1SDimitry Andric   case LoongArch::MOD_WU:
3440fca6ea1SDimitry Andric   case LoongArch::SLL_W:
3450fca6ea1SDimitry Andric   case LoongArch::SRL_W:
3460fca6ea1SDimitry Andric   case LoongArch::SRA_W:
3470fca6ea1SDimitry Andric   case LoongArch::ROTR_W:
3480fca6ea1SDimitry Andric   case LoongArch::SLLI_W:
3490fca6ea1SDimitry Andric   case LoongArch::SRLI_W:
3500fca6ea1SDimitry Andric   case LoongArch::SRAI_W:
3510fca6ea1SDimitry Andric   case LoongArch::ROTRI_W:
3520fca6ea1SDimitry Andric   case LoongArch::EXT_W_B:
3530fca6ea1SDimitry Andric   case LoongArch::EXT_W_H:
3540fca6ea1SDimitry Andric   case LoongArch::CLO_W:
3550fca6ea1SDimitry Andric   case LoongArch::CLZ_W:
3560fca6ea1SDimitry Andric   case LoongArch::CTO_W:
3570fca6ea1SDimitry Andric   case LoongArch::CTZ_W:
3580fca6ea1SDimitry Andric   case LoongArch::BYTEPICK_W:
3590fca6ea1SDimitry Andric   case LoongArch::REVB_2H:
3600fca6ea1SDimitry Andric   case LoongArch::BITREV_4B:
3610fca6ea1SDimitry Andric   case LoongArch::BITREV_W:
3620fca6ea1SDimitry Andric   case LoongArch::BSTRINS_W:
3630fca6ea1SDimitry Andric   case LoongArch::BSTRPICK_W:
3640fca6ea1SDimitry Andric   case LoongArch::LD_B:
3650fca6ea1SDimitry Andric   case LoongArch::LD_H:
3660fca6ea1SDimitry Andric   case LoongArch::LD_W:
3670fca6ea1SDimitry Andric   case LoongArch::LD_BU:
3680fca6ea1SDimitry Andric   case LoongArch::LD_HU:
3690fca6ea1SDimitry Andric   case LoongArch::LL_W:
3700fca6ea1SDimitry Andric   case LoongArch::LLACQ_W:
3710fca6ea1SDimitry Andric   case LoongArch::RDTIMEL_W:
3720fca6ea1SDimitry Andric   case LoongArch::RDTIMEH_W:
3730fca6ea1SDimitry Andric   case LoongArch::CPUCFG:
3740fca6ea1SDimitry Andric   case LoongArch::LDX_B:
3750fca6ea1SDimitry Andric   case LoongArch::LDX_H:
3760fca6ea1SDimitry Andric   case LoongArch::LDX_W:
3770fca6ea1SDimitry Andric   case LoongArch::LDX_BU:
3780fca6ea1SDimitry Andric   case LoongArch::LDX_HU:
3790fca6ea1SDimitry Andric   case LoongArch::LDPTR_W:
3800fca6ea1SDimitry Andric   case LoongArch::LDGT_B:
3810fca6ea1SDimitry Andric   case LoongArch::LDGT_H:
3820fca6ea1SDimitry Andric   case LoongArch::LDGT_W:
3830fca6ea1SDimitry Andric   case LoongArch::LDLE_B:
3840fca6ea1SDimitry Andric   case LoongArch::LDLE_H:
3850fca6ea1SDimitry Andric   case LoongArch::LDLE_W:
3860fca6ea1SDimitry Andric   case LoongArch::AMSWAP_B:
3870fca6ea1SDimitry Andric   case LoongArch::AMSWAP_H:
3880fca6ea1SDimitry Andric   case LoongArch::AMSWAP_W:
3890fca6ea1SDimitry Andric   case LoongArch::AMADD_B:
3900fca6ea1SDimitry Andric   case LoongArch::AMADD_H:
3910fca6ea1SDimitry Andric   case LoongArch::AMADD_W:
3920fca6ea1SDimitry Andric   case LoongArch::AMAND_W:
3930fca6ea1SDimitry Andric   case LoongArch::AMOR_W:
3940fca6ea1SDimitry Andric   case LoongArch::AMXOR_W:
3950fca6ea1SDimitry Andric   case LoongArch::AMMAX_W:
3960fca6ea1SDimitry Andric   case LoongArch::AMMIN_W:
3970fca6ea1SDimitry Andric   case LoongArch::AMMAX_WU:
3980fca6ea1SDimitry Andric   case LoongArch::AMMIN_WU:
3990fca6ea1SDimitry Andric   case LoongArch::AMSWAP__DB_B:
4000fca6ea1SDimitry Andric   case LoongArch::AMSWAP__DB_H:
4010fca6ea1SDimitry Andric   case LoongArch::AMSWAP__DB_W:
4020fca6ea1SDimitry Andric   case LoongArch::AMADD__DB_B:
4030fca6ea1SDimitry Andric   case LoongArch::AMADD__DB_H:
4040fca6ea1SDimitry Andric   case LoongArch::AMADD__DB_W:
4050fca6ea1SDimitry Andric   case LoongArch::AMAND__DB_W:
4060fca6ea1SDimitry Andric   case LoongArch::AMOR__DB_W:
4070fca6ea1SDimitry Andric   case LoongArch::AMXOR__DB_W:
4080fca6ea1SDimitry Andric   case LoongArch::AMMAX__DB_W:
4090fca6ea1SDimitry Andric   case LoongArch::AMMIN__DB_W:
4100fca6ea1SDimitry Andric   case LoongArch::AMMAX__DB_WU:
4110fca6ea1SDimitry Andric   case LoongArch::AMMIN__DB_WU:
4120fca6ea1SDimitry Andric   case LoongArch::AMCAS_B:
4130fca6ea1SDimitry Andric   case LoongArch::AMCAS_H:
4140fca6ea1SDimitry Andric   case LoongArch::AMCAS_W:
4150fca6ea1SDimitry Andric   case LoongArch::AMCAS__DB_B:
4160fca6ea1SDimitry Andric   case LoongArch::AMCAS__DB_H:
4170fca6ea1SDimitry Andric   case LoongArch::AMCAS__DB_W:
4180fca6ea1SDimitry Andric   case LoongArch::CRC_W_B_W:
4190fca6ea1SDimitry Andric   case LoongArch::CRC_W_H_W:
4200fca6ea1SDimitry Andric   case LoongArch::CRC_W_W_W:
4210fca6ea1SDimitry Andric   case LoongArch::CRC_W_D_W:
4220fca6ea1SDimitry Andric   case LoongArch::CRCC_W_B_W:
4230fca6ea1SDimitry Andric   case LoongArch::CRCC_W_H_W:
4240fca6ea1SDimitry Andric   case LoongArch::CRCC_W_W_W:
4250fca6ea1SDimitry Andric   case LoongArch::CRCC_W_D_W:
4260fca6ea1SDimitry Andric   case LoongArch::IOCSRRD_B:
4270fca6ea1SDimitry Andric   case LoongArch::IOCSRRD_H:
4280fca6ea1SDimitry Andric   case LoongArch::IOCSRRD_W:
4290fca6ea1SDimitry Andric   case LoongArch::MOVFR2GR_S:
4300fca6ea1SDimitry Andric   case LoongArch::MOVFCSR2GR:
4310fca6ea1SDimitry Andric   case LoongArch::MOVCF2GR:
4320fca6ea1SDimitry Andric   case LoongArch::MOVFRH2GR_S:
4330fca6ea1SDimitry Andric   case LoongArch::MOVFR2GR_S_64:
4340fca6ea1SDimitry Andric     // TODO: Add vector
4350fca6ea1SDimitry Andric     return true;
4360fca6ea1SDimitry Andric   // Special cases that require checking operands.
4370fca6ea1SDimitry Andric   // shifting right sufficiently makes the value 32-bit sign-extended
4380fca6ea1SDimitry Andric   case LoongArch::SRAI_D:
4390fca6ea1SDimitry Andric     return MI.getOperand(2).getImm() >= 32;
4400fca6ea1SDimitry Andric   case LoongArch::SRLI_D:
4410fca6ea1SDimitry Andric     return MI.getOperand(2).getImm() > 32;
4420fca6ea1SDimitry Andric   // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.
4430fca6ea1SDimitry Andric   case LoongArch::ADDI_D:
4440fca6ea1SDimitry Andric   case LoongArch::ORI:
4450fca6ea1SDimitry Andric     return MI.getOperand(1).isReg() &&
4460fca6ea1SDimitry Andric            MI.getOperand(1).getReg() == LoongArch::R0;
4470fca6ea1SDimitry Andric   // A bits extract is sign extended if the msb is less than 31.
4480fca6ea1SDimitry Andric   case LoongArch::BSTRPICK_D:
4490fca6ea1SDimitry Andric     return MI.getOperand(2).getImm() < 31;
4500fca6ea1SDimitry Andric   // Copying from R0 produces zero.
4510fca6ea1SDimitry Andric   case LoongArch::COPY:
4520fca6ea1SDimitry Andric     return MI.getOperand(1).getReg() == LoongArch::R0;
4530fca6ea1SDimitry Andric   // Ignore the scratch register destination.
4540fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicSwap32:
4550fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicSwap32:
4560fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadAdd32:
4570fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadSub32:
4580fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadNand32:
4590fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadNand32:
4600fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadAdd32:
4610fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadSub32:
4620fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadAnd32:
4630fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadOr32:
4640fca6ea1SDimitry Andric   case LoongArch::PseudoAtomicLoadXor32:
4650fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadUMax32:
4660fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadUMin32:
4670fca6ea1SDimitry Andric   case LoongArch::PseudoCmpXchg32:
4680fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedCmpXchg32:
4690fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadMax32:
4700fca6ea1SDimitry Andric   case LoongArch::PseudoMaskedAtomicLoadMin32:
4710fca6ea1SDimitry Andric     return OpNo == 0;
4720fca6ea1SDimitry Andric   }
4730fca6ea1SDimitry Andric 
4740fca6ea1SDimitry Andric   return false;
4750fca6ea1SDimitry Andric }
4760fca6ea1SDimitry Andric 
4770fca6ea1SDimitry Andric static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
4780fca6ea1SDimitry Andric                             const MachineRegisterInfo &MRI,
4790fca6ea1SDimitry Andric                             SmallPtrSetImpl<MachineInstr *> &FixableDef) {
4800fca6ea1SDimitry Andric   SmallSet<Register, 4> Visited;
4810fca6ea1SDimitry Andric   SmallVector<Register, 4> Worklist;
4820fca6ea1SDimitry Andric 
4830fca6ea1SDimitry Andric   auto AddRegToWorkList = [&](Register SrcReg) {
4840fca6ea1SDimitry Andric     if (!SrcReg.isVirtual())
4850fca6ea1SDimitry Andric       return false;
4860fca6ea1SDimitry Andric     Worklist.push_back(SrcReg);
4870fca6ea1SDimitry Andric     return true;
4880fca6ea1SDimitry Andric   };
4890fca6ea1SDimitry Andric 
4900fca6ea1SDimitry Andric   if (!AddRegToWorkList(SrcReg))
4910fca6ea1SDimitry Andric     return false;
4920fca6ea1SDimitry Andric 
4930fca6ea1SDimitry Andric   while (!Worklist.empty()) {
4940fca6ea1SDimitry Andric     Register Reg = Worklist.pop_back_val();
4950fca6ea1SDimitry Andric 
4960fca6ea1SDimitry Andric     // If we already visited this register, we don't need to check it again.
4970fca6ea1SDimitry Andric     if (!Visited.insert(Reg).second)
4980fca6ea1SDimitry Andric       continue;
4990fca6ea1SDimitry Andric 
5000fca6ea1SDimitry Andric     MachineInstr *MI = MRI.getVRegDef(Reg);
5010fca6ea1SDimitry Andric     if (!MI)
5020fca6ea1SDimitry Andric       continue;
5030fca6ea1SDimitry Andric 
5040fca6ea1SDimitry Andric     int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
5050fca6ea1SDimitry Andric     assert(OpNo != -1 && "Couldn't find register");
5060fca6ea1SDimitry Andric 
5070fca6ea1SDimitry Andric     // If this is a sign extending operation we don't need to look any further.
5080fca6ea1SDimitry Andric     if (isSignExtendingOpW(*MI, MRI, OpNo))
5090fca6ea1SDimitry Andric       continue;
5100fca6ea1SDimitry Andric 
5110fca6ea1SDimitry Andric     // Is this an instruction that propagates sign extend?
5120fca6ea1SDimitry Andric     switch (MI->getOpcode()) {
5130fca6ea1SDimitry Andric     default:
5140fca6ea1SDimitry Andric       // Unknown opcode, give up.
5150fca6ea1SDimitry Andric       return false;
5160fca6ea1SDimitry Andric     case LoongArch::COPY: {
5170fca6ea1SDimitry Andric       const MachineFunction *MF = MI->getMF();
5180fca6ea1SDimitry Andric       const LoongArchMachineFunctionInfo *LAFI =
5190fca6ea1SDimitry Andric           MF->getInfo<LoongArchMachineFunctionInfo>();
5200fca6ea1SDimitry Andric 
5210fca6ea1SDimitry Andric       // If this is the entry block and the register is livein, see if we know
5220fca6ea1SDimitry Andric       // it is sign extended.
5230fca6ea1SDimitry Andric       if (MI->getParent() == &MF->front()) {
5240fca6ea1SDimitry Andric         Register VReg = MI->getOperand(0).getReg();
5250fca6ea1SDimitry Andric         if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg))
5260fca6ea1SDimitry Andric           continue;
5270fca6ea1SDimitry Andric       }
5280fca6ea1SDimitry Andric 
5290fca6ea1SDimitry Andric       Register CopySrcReg = MI->getOperand(1).getReg();
5300fca6ea1SDimitry Andric       if (CopySrcReg == LoongArch::R4) {
5310fca6ea1SDimitry Andric         // For a method return value, we check the ZExt/SExt flags in attribute.
5320fca6ea1SDimitry Andric         // We assume the following code sequence for method call.
5330fca6ea1SDimitry Andric         // PseudoCALL @bar, ...
5340fca6ea1SDimitry Andric         // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
5350fca6ea1SDimitry Andric         // %0:gpr = COPY $r4
5360fca6ea1SDimitry Andric         //
5370fca6ea1SDimitry Andric         // We use the PseudoCall to look up the IR function being called to find
5380fca6ea1SDimitry Andric         // its return attributes.
5390fca6ea1SDimitry Andric         const MachineBasicBlock *MBB = MI->getParent();
5400fca6ea1SDimitry Andric         auto II = MI->getIterator();
5410fca6ea1SDimitry Andric         if (II == MBB->instr_begin() ||
5420fca6ea1SDimitry Andric             (--II)->getOpcode() != LoongArch::ADJCALLSTACKUP)
5430fca6ea1SDimitry Andric           return false;
5440fca6ea1SDimitry Andric 
5450fca6ea1SDimitry Andric         const MachineInstr &CallMI = *(--II);
5460fca6ea1SDimitry Andric         if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5470fca6ea1SDimitry Andric           return false;
5480fca6ea1SDimitry Andric 
5490fca6ea1SDimitry Andric         auto *CalleeFn =
5500fca6ea1SDimitry Andric             dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5510fca6ea1SDimitry Andric         if (!CalleeFn)
5520fca6ea1SDimitry Andric           return false;
5530fca6ea1SDimitry Andric 
5540fca6ea1SDimitry Andric         auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5550fca6ea1SDimitry Andric         if (!IntTy)
5560fca6ea1SDimitry Andric           return false;
5570fca6ea1SDimitry Andric 
5580fca6ea1SDimitry Andric         const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5590fca6ea1SDimitry Andric         unsigned BitWidth = IntTy->getBitWidth();
5600fca6ea1SDimitry Andric         if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
5610fca6ea1SDimitry Andric             (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
5620fca6ea1SDimitry Andric           continue;
5630fca6ea1SDimitry Andric       }
5640fca6ea1SDimitry Andric 
5650fca6ea1SDimitry Andric       if (!AddRegToWorkList(CopySrcReg))
5660fca6ea1SDimitry Andric         return false;
5670fca6ea1SDimitry Andric 
5680fca6ea1SDimitry Andric       break;
5690fca6ea1SDimitry Andric     }
5700fca6ea1SDimitry Andric 
5710fca6ea1SDimitry Andric     // For these, we just need to check if the 1st operand is sign extended.
5720fca6ea1SDimitry Andric     case LoongArch::MOD_D:
5730fca6ea1SDimitry Andric     case LoongArch::ANDI:
5740fca6ea1SDimitry Andric     case LoongArch::ORI:
5750fca6ea1SDimitry Andric     case LoongArch::XORI:
5760fca6ea1SDimitry Andric       // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
5770fca6ea1SDimitry Andric       // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
5780fca6ea1SDimitry Andric       // Logical operations use a sign extended 12-bit immediate.
5790fca6ea1SDimitry Andric       if (!AddRegToWorkList(MI->getOperand(1).getReg()))
5800fca6ea1SDimitry Andric         return false;
5810fca6ea1SDimitry Andric 
5820fca6ea1SDimitry Andric       break;
5830fca6ea1SDimitry Andric     case LoongArch::MOD_DU:
5840fca6ea1SDimitry Andric     case LoongArch::AND:
5850fca6ea1SDimitry Andric     case LoongArch::OR:
5860fca6ea1SDimitry Andric     case LoongArch::XOR:
5870fca6ea1SDimitry Andric     case LoongArch::ANDN:
5880fca6ea1SDimitry Andric     case LoongArch::ORN:
5890fca6ea1SDimitry Andric     case LoongArch::PHI: {
5900fca6ea1SDimitry Andric       // If all incoming values are sign-extended, the output of AND, OR, XOR,
5910fca6ea1SDimitry Andric       // or PHI is also sign-extended.
5920fca6ea1SDimitry Andric 
5930fca6ea1SDimitry Andric       // The input registers for PHI are operand 1, 3, ...
5940fca6ea1SDimitry Andric       // The input registers for others are operand 1 and 2.
5950fca6ea1SDimitry Andric       unsigned B = 1, E = 3, D = 1;
5960fca6ea1SDimitry Andric       switch (MI->getOpcode()) {
5970fca6ea1SDimitry Andric       case LoongArch::PHI:
5980fca6ea1SDimitry Andric         E = MI->getNumOperands();
5990fca6ea1SDimitry Andric         D = 2;
6000fca6ea1SDimitry Andric         break;
6010fca6ea1SDimitry Andric       }
6020fca6ea1SDimitry Andric 
6030fca6ea1SDimitry Andric       for (unsigned I = B; I != E; I += D) {
6040fca6ea1SDimitry Andric         if (!MI->getOperand(I).isReg())
6050fca6ea1SDimitry Andric           return false;
6060fca6ea1SDimitry Andric 
6070fca6ea1SDimitry Andric         if (!AddRegToWorkList(MI->getOperand(I).getReg()))
6080fca6ea1SDimitry Andric           return false;
6090fca6ea1SDimitry Andric       }
6100fca6ea1SDimitry Andric 
6110fca6ea1SDimitry Andric       break;
6120fca6ea1SDimitry Andric     }
6130fca6ea1SDimitry Andric 
6140fca6ea1SDimitry Andric     case LoongArch::MASKEQZ:
6150fca6ea1SDimitry Andric     case LoongArch::MASKNEZ:
6160fca6ea1SDimitry Andric       // Instructions return zero or operand 1. Result is sign extended if
6170fca6ea1SDimitry Andric       // operand 1 is sign extended.
6180fca6ea1SDimitry Andric       if (!AddRegToWorkList(MI->getOperand(1).getReg()))
6190fca6ea1SDimitry Andric         return false;
6200fca6ea1SDimitry Andric       break;
6210fca6ea1SDimitry Andric 
6220fca6ea1SDimitry Andric     // With these opcode, we can "fix" them with the W-version
6230fca6ea1SDimitry Andric     // if we know all users of the result only rely on bits 31:0
6240fca6ea1SDimitry Andric     case LoongArch::SLLI_D:
6250fca6ea1SDimitry Andric       // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits
6260fca6ea1SDimitry Andric       if (MI->getOperand(2).getImm() >= 32)
6270fca6ea1SDimitry Andric         return false;
6280fca6ea1SDimitry Andric       [[fallthrough]];
6290fca6ea1SDimitry Andric     case LoongArch::ADDI_D:
6300fca6ea1SDimitry Andric     case LoongArch::ADD_D:
6310fca6ea1SDimitry Andric     case LoongArch::LD_D:
6320fca6ea1SDimitry Andric     case LoongArch::LD_WU:
6330fca6ea1SDimitry Andric     case LoongArch::MUL_D:
6340fca6ea1SDimitry Andric     case LoongArch::SUB_D:
6350fca6ea1SDimitry Andric       if (hasAllWUsers(*MI, ST, MRI)) {
6360fca6ea1SDimitry Andric         FixableDef.insert(MI);
6370fca6ea1SDimitry Andric         break;
6380fca6ea1SDimitry Andric       }
6390fca6ea1SDimitry Andric       return false;
640*6e516c87SDimitry Andric     // If all incoming values are sign-extended and all users only use
641*6e516c87SDimitry Andric     // the lower 32 bits, then convert them to W versions.
642*6e516c87SDimitry Andric     case LoongArch::DIV_D: {
643*6e516c87SDimitry Andric       if (!AddRegToWorkList(MI->getOperand(1).getReg()))
644*6e516c87SDimitry Andric         return false;
645*6e516c87SDimitry Andric       if (!AddRegToWorkList(MI->getOperand(2).getReg()))
646*6e516c87SDimitry Andric         return false;
647*6e516c87SDimitry Andric       if (hasAllWUsers(*MI, ST, MRI)) {
648*6e516c87SDimitry Andric         FixableDef.insert(MI);
649*6e516c87SDimitry Andric         break;
650*6e516c87SDimitry Andric       }
651*6e516c87SDimitry Andric       return false;
652*6e516c87SDimitry Andric     }
6530fca6ea1SDimitry Andric     }
6540fca6ea1SDimitry Andric   }
6550fca6ea1SDimitry Andric 
6560fca6ea1SDimitry Andric   // If we get here, then every node we visited produces a sign extended value
6570fca6ea1SDimitry Andric   // or propagated sign extended values. So the result must be sign extended.
6580fca6ea1SDimitry Andric   return true;
6590fca6ea1SDimitry Andric }
6600fca6ea1SDimitry Andric 
6610fca6ea1SDimitry Andric static unsigned getWOp(unsigned Opcode) {
6620fca6ea1SDimitry Andric   switch (Opcode) {
6630fca6ea1SDimitry Andric   case LoongArch::ADDI_D:
6640fca6ea1SDimitry Andric     return LoongArch::ADDI_W;
6650fca6ea1SDimitry Andric   case LoongArch::ADD_D:
6660fca6ea1SDimitry Andric     return LoongArch::ADD_W;
667*6e516c87SDimitry Andric   case LoongArch::DIV_D:
668*6e516c87SDimitry Andric     return LoongArch::DIV_W;
6690fca6ea1SDimitry Andric   case LoongArch::LD_D:
6700fca6ea1SDimitry Andric   case LoongArch::LD_WU:
6710fca6ea1SDimitry Andric     return LoongArch::LD_W;
6720fca6ea1SDimitry Andric   case LoongArch::MUL_D:
6730fca6ea1SDimitry Andric     return LoongArch::MUL_W;
6740fca6ea1SDimitry Andric   case LoongArch::SLLI_D:
6750fca6ea1SDimitry Andric     return LoongArch::SLLI_W;
6760fca6ea1SDimitry Andric   case LoongArch::SUB_D:
6770fca6ea1SDimitry Andric     return LoongArch::SUB_W;
6780fca6ea1SDimitry Andric   default:
6790fca6ea1SDimitry Andric     llvm_unreachable("Unexpected opcode for replacement with W variant");
6800fca6ea1SDimitry Andric   }
6810fca6ea1SDimitry Andric }
6820fca6ea1SDimitry Andric 
6830fca6ea1SDimitry Andric bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
6840fca6ea1SDimitry Andric                                             const LoongArchInstrInfo &TII,
6850fca6ea1SDimitry Andric                                             const LoongArchSubtarget &ST,
6860fca6ea1SDimitry Andric                                             MachineRegisterInfo &MRI) {
6870fca6ea1SDimitry Andric   if (DisableSExtWRemoval)
6880fca6ea1SDimitry Andric     return false;
6890fca6ea1SDimitry Andric 
6900fca6ea1SDimitry Andric   bool MadeChange = false;
6910fca6ea1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
6920fca6ea1SDimitry Andric     for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
6930fca6ea1SDimitry Andric       // We're looking for the sext.w pattern ADDI.W rd, rs, 0.
6940fca6ea1SDimitry Andric       if (!LoongArch::isSEXT_W(MI))
6950fca6ea1SDimitry Andric         continue;
6960fca6ea1SDimitry Andric 
6970fca6ea1SDimitry Andric       Register SrcReg = MI.getOperand(1).getReg();
6980fca6ea1SDimitry Andric 
6990fca6ea1SDimitry Andric       SmallPtrSet<MachineInstr *, 4> FixableDefs;
7000fca6ea1SDimitry Andric 
7010fca6ea1SDimitry Andric       // If all users only use the lower bits, this sext.w is redundant.
7020fca6ea1SDimitry Andric       // Or if all definitions reaching MI sign-extend their output,
7030fca6ea1SDimitry Andric       // then sext.w is redundant.
7040fca6ea1SDimitry Andric       if (!hasAllWUsers(MI, ST, MRI) &&
7050fca6ea1SDimitry Andric           !isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
7060fca6ea1SDimitry Andric         continue;
7070fca6ea1SDimitry Andric 
7080fca6ea1SDimitry Andric       Register DstReg = MI.getOperand(0).getReg();
7090fca6ea1SDimitry Andric       if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
7100fca6ea1SDimitry Andric         continue;
7110fca6ea1SDimitry Andric 
7120fca6ea1SDimitry Andric       // Convert Fixable instructions to their W versions.
7130fca6ea1SDimitry Andric       for (MachineInstr *Fixable : FixableDefs) {
7140fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
7150fca6ea1SDimitry Andric         Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode())));
7160fca6ea1SDimitry Andric         Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
7170fca6ea1SDimitry Andric         Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
7180fca6ea1SDimitry Andric         Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
7190fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "     with " << *Fixable);
7200fca6ea1SDimitry Andric         ++NumTransformedToWInstrs;
7210fca6ea1SDimitry Andric       }
7220fca6ea1SDimitry Andric 
7230fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
7240fca6ea1SDimitry Andric       MRI.replaceRegWith(DstReg, SrcReg);
7250fca6ea1SDimitry Andric       MRI.clearKillFlags(SrcReg);
7260fca6ea1SDimitry Andric       MI.eraseFromParent();
7270fca6ea1SDimitry Andric       ++NumRemovedSExtW;
7280fca6ea1SDimitry Andric       MadeChange = true;
7290fca6ea1SDimitry Andric     }
7300fca6ea1SDimitry Andric   }
7310fca6ea1SDimitry Andric 
7320fca6ea1SDimitry Andric   return MadeChange;
7330fca6ea1SDimitry Andric }
7340fca6ea1SDimitry Andric 
7350fca6ea1SDimitry Andric bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF,
7360fca6ea1SDimitry Andric                                              const LoongArchInstrInfo &TII,
7370fca6ea1SDimitry Andric                                              const LoongArchSubtarget &ST,
7380fca6ea1SDimitry Andric                                              MachineRegisterInfo &MRI) {
7390fca6ea1SDimitry Andric   bool MadeChange = false;
7400fca6ea1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
7410fca6ea1SDimitry Andric     for (MachineInstr &MI : MBB) {
7420fca6ea1SDimitry Andric       unsigned Opc;
7430fca6ea1SDimitry Andric       switch (MI.getOpcode()) {
7440fca6ea1SDimitry Andric       default:
7450fca6ea1SDimitry Andric         continue;
7460fca6ea1SDimitry Andric       case LoongArch::ADDI_W:
7470fca6ea1SDimitry Andric         Opc = LoongArch::ADDI_D;
7480fca6ea1SDimitry Andric         break;
7490fca6ea1SDimitry Andric       }
7500fca6ea1SDimitry Andric 
7510fca6ea1SDimitry Andric       if (hasAllWUsers(MI, ST, MRI)) {
7520fca6ea1SDimitry Andric         MI.setDesc(TII.get(Opc));
7530fca6ea1SDimitry Andric         MadeChange = true;
7540fca6ea1SDimitry Andric       }
7550fca6ea1SDimitry Andric     }
7560fca6ea1SDimitry Andric   }
7570fca6ea1SDimitry Andric 
7580fca6ea1SDimitry Andric   return MadeChange;
7590fca6ea1SDimitry Andric }
7600fca6ea1SDimitry Andric 
7610fca6ea1SDimitry Andric bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF,
7620fca6ea1SDimitry Andric                                              const LoongArchInstrInfo &TII,
7630fca6ea1SDimitry Andric                                              const LoongArchSubtarget &ST,
7640fca6ea1SDimitry Andric                                              MachineRegisterInfo &MRI) {
7650fca6ea1SDimitry Andric   bool MadeChange = false;
7660fca6ea1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
7670fca6ea1SDimitry Andric     for (MachineInstr &MI : MBB) {
7680fca6ea1SDimitry Andric       unsigned WOpc;
7690fca6ea1SDimitry Andric       // TODO: Add more?
7700fca6ea1SDimitry Andric       switch (MI.getOpcode()) {
7710fca6ea1SDimitry Andric       default:
7720fca6ea1SDimitry Andric         continue;
7730fca6ea1SDimitry Andric       case LoongArch::ADD_D:
7740fca6ea1SDimitry Andric         WOpc = LoongArch::ADD_W;
7750fca6ea1SDimitry Andric         break;
7760fca6ea1SDimitry Andric       case LoongArch::ADDI_D:
7770fca6ea1SDimitry Andric         WOpc = LoongArch::ADDI_W;
7780fca6ea1SDimitry Andric         break;
7790fca6ea1SDimitry Andric       case LoongArch::SUB_D:
7800fca6ea1SDimitry Andric         WOpc = LoongArch::SUB_W;
7810fca6ea1SDimitry Andric         break;
7820fca6ea1SDimitry Andric       case LoongArch::MUL_D:
7830fca6ea1SDimitry Andric         WOpc = LoongArch::MUL_W;
7840fca6ea1SDimitry Andric         break;
7850fca6ea1SDimitry Andric       case LoongArch::SLLI_D:
7860fca6ea1SDimitry Andric         // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits
7870fca6ea1SDimitry Andric         if (MI.getOperand(2).getImm() >= 32)
7880fca6ea1SDimitry Andric           continue;
7890fca6ea1SDimitry Andric         WOpc = LoongArch::SLLI_W;
7900fca6ea1SDimitry Andric         break;
7910fca6ea1SDimitry Andric       case LoongArch::LD_D:
7920fca6ea1SDimitry Andric       case LoongArch::LD_WU:
7930fca6ea1SDimitry Andric         WOpc = LoongArch::LD_W;
7940fca6ea1SDimitry Andric         break;
7950fca6ea1SDimitry Andric       }
7960fca6ea1SDimitry Andric 
7970fca6ea1SDimitry Andric       if (hasAllWUsers(MI, ST, MRI)) {
7980fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "Replacing " << MI);
7990fca6ea1SDimitry Andric         MI.setDesc(TII.get(WOpc));
8000fca6ea1SDimitry Andric         MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
8010fca6ea1SDimitry Andric         MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
8020fca6ea1SDimitry Andric         MI.clearFlag(MachineInstr::MIFlag::IsExact);
8030fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "     with " << MI);
8040fca6ea1SDimitry Andric         ++NumTransformedToWInstrs;
8050fca6ea1SDimitry Andric         MadeChange = true;
8060fca6ea1SDimitry Andric       }
8070fca6ea1SDimitry Andric     }
8080fca6ea1SDimitry Andric   }
8090fca6ea1SDimitry Andric 
8100fca6ea1SDimitry Andric   return MadeChange;
8110fca6ea1SDimitry Andric }
8120fca6ea1SDimitry Andric 
8130fca6ea1SDimitry Andric bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
8140fca6ea1SDimitry Andric   if (skipFunction(MF.getFunction()))
8150fca6ea1SDimitry Andric     return false;
8160fca6ea1SDimitry Andric 
8170fca6ea1SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
8180fca6ea1SDimitry Andric   const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>();
8190fca6ea1SDimitry Andric   const LoongArchInstrInfo &TII = *ST.getInstrInfo();
8200fca6ea1SDimitry Andric 
8210fca6ea1SDimitry Andric   if (!ST.is64Bit())
8220fca6ea1SDimitry Andric     return false;
8230fca6ea1SDimitry Andric 
8240fca6ea1SDimitry Andric   bool MadeChange = false;
8250fca6ea1SDimitry Andric   MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
8260fca6ea1SDimitry Andric 
8270fca6ea1SDimitry Andric   if (!(DisableCvtToDSuffix || ST.preferWInst()))
8280fca6ea1SDimitry Andric     MadeChange |= convertToDSuffixes(MF, TII, ST, MRI);
8290fca6ea1SDimitry Andric 
8300fca6ea1SDimitry Andric   if (ST.preferWInst())
8310fca6ea1SDimitry Andric     MadeChange |= convertToWSuffixes(MF, TII, ST, MRI);
8320fca6ea1SDimitry Andric 
8330fca6ea1SDimitry Andric   return MadeChange;
8340fca6ea1SDimitry Andric }
835