xref: /llvm-project/llvm/lib/Target/X86/X86CompressEVEX.cpp (revision 29f11f0a3240dff1e10ed3d4a5412ecb8c762327)
1 //===- X86CompressEVEX.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
10 // when possible in order to reduce code size or facilitate HW decoding.
11 //
12 // Possible compression:
13 //   a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
14 //   b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy/VEX)
15 //   c. NDD (EVEX) -> non-NDD (legacy)
16 //   d. NF_ND (EVEX) -> NF (EVEX)
17 //   e. NonNF (EVEX) -> NF (EVEX)
18 //
19 // Compression a, b and c can always reduce code size, with some exceptions
20 // such as promoted 16-bit CRC32 which is as long as the legacy version.
21 //
22 // legacy:
23 //   crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
24 // promoted:
25 //   crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
26 //
27 // From performance perspective, these should be same (same uops and same EXE
28 // ports). From a FMV perspective, an older legacy encoding is preferred b/c it
29 // can execute in more places (broader HW install base). So we will still do
30 // the compression.
31 //
32 // Compression d can help hardware decode (HW may skip reading the NDD
33 // register) although the instruction length remains unchanged.
34 //
35 // Compression e can help hardware skip updating EFLAGS although the instruction
36 // length remains unchanged.
37 //===----------------------------------------------------------------------===//
38 
39 #include "MCTargetDesc/X86BaseInfo.h"
40 #include "X86.h"
41 #include "X86InstrInfo.h"
42 #include "X86Subtarget.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/CodeGen/MachineFunction.h"
45 #include "llvm/CodeGen/MachineFunctionPass.h"
46 #include "llvm/CodeGen/MachineInstr.h"
47 #include "llvm/CodeGen/MachineOperand.h"
48 #include "llvm/MC/MCInstrDesc.h"
49 #include "llvm/Pass.h"
50 #include <atomic>
51 #include <cassert>
52 #include <cstdint>
53 
54 using namespace llvm;
55 
56 #define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
57 #define COMP_EVEX_NAME "x86-compress-evex"
58 
59 #define DEBUG_TYPE COMP_EVEX_NAME
60 
61 namespace {
62 // Including the generated EVEX compression tables.
63 #define GET_X86_COMPRESS_EVEX_TABLE
64 #include "X86GenInstrMapping.inc"
65 
66 class CompressEVEXPass : public MachineFunctionPass {
67 public:
68   static char ID;
69   CompressEVEXPass() : MachineFunctionPass(ID) {}
70   StringRef getPassName() const override { return COMP_EVEX_DESC; }
71 
72   bool runOnMachineFunction(MachineFunction &MF) override;
73 
74   // This pass runs after regalloc and doesn't support VReg operands.
75   MachineFunctionProperties getRequiredProperties() const override {
76     return MachineFunctionProperties().set(
77         MachineFunctionProperties::Property::NoVRegs);
78   }
79 };
80 
81 } // end anonymous namespace
82 
83 char CompressEVEXPass::ID = 0;
84 
85 static bool usesExtendedRegister(const MachineInstr &MI) {
86   auto isHiRegIdx = [](unsigned Reg) {
87     // Check for XMM register with indexes between 16 - 31.
88     if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
89       return true;
90     // Check for YMM register with indexes between 16 - 31.
91     if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
92       return true;
93     // Check for GPR with indexes between 16 - 31.
94     if (X86II::isApxExtendedReg(Reg))
95       return true;
96     return false;
97   };
98 
99   // Check that operands are not ZMM regs or
100   // XMM/YMM regs with hi indexes between 16 - 31.
101   for (const MachineOperand &MO : MI.explicit_operands()) {
102     if (!MO.isReg())
103       continue;
104 
105     Register Reg = MO.getReg();
106     assert(!X86II::isZMMReg(Reg) &&
107            "ZMM instructions should not be in the EVEX->VEX tables");
108     if (isHiRegIdx(Reg))
109       return true;
110   }
111 
112   return false;
113 }
114 
115 // Do any custom cleanup needed to finalize the conversion.
116 static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
117   (void)NewOpc;
118   unsigned Opc = MI.getOpcode();
119   switch (Opc) {
120   case X86::VALIGNDZ128rri:
121   case X86::VALIGNDZ128rmi:
122   case X86::VALIGNQZ128rri:
123   case X86::VALIGNQZ128rmi: {
124     assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
125            "Unexpected new opcode!");
126     unsigned Scale =
127         (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
128     MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
129     Imm.setImm(Imm.getImm() * Scale);
130     break;
131   }
132   case X86::VSHUFF32X4Z256rmi:
133   case X86::VSHUFF32X4Z256rri:
134   case X86::VSHUFF64X2Z256rmi:
135   case X86::VSHUFF64X2Z256rri:
136   case X86::VSHUFI32X4Z256rmi:
137   case X86::VSHUFI32X4Z256rri:
138   case X86::VSHUFI64X2Z256rmi:
139   case X86::VSHUFI64X2Z256rri: {
140     assert((NewOpc == X86::VPERM2F128rri || NewOpc == X86::VPERM2I128rri ||
141             NewOpc == X86::VPERM2F128rmi || NewOpc == X86::VPERM2I128rmi) &&
142            "Unexpected new opcode!");
143     MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
144     int64_t ImmVal = Imm.getImm();
145     // Set bit 5, move bit 1 to bit 4, copy bit 0.
146     Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
147     break;
148   }
149   case X86::VRNDSCALEPDZ128rri:
150   case X86::VRNDSCALEPDZ128rmi:
151   case X86::VRNDSCALEPSZ128rri:
152   case X86::VRNDSCALEPSZ128rmi:
153   case X86::VRNDSCALEPDZ256rri:
154   case X86::VRNDSCALEPDZ256rmi:
155   case X86::VRNDSCALEPSZ256rri:
156   case X86::VRNDSCALEPSZ256rmi:
157   case X86::VRNDSCALESDZrri:
158   case X86::VRNDSCALESDZrmi:
159   case X86::VRNDSCALESSZrri:
160   case X86::VRNDSCALESSZrmi:
161   case X86::VRNDSCALESDZrri_Int:
162   case X86::VRNDSCALESDZrmi_Int:
163   case X86::VRNDSCALESSZrri_Int:
164   case X86::VRNDSCALESSZrmi_Int:
165     const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
166     int64_t ImmVal = Imm.getImm();
167     // Ensure that only bits 3:0 of the immediate are used.
168     if ((ImmVal & 0xf) != ImmVal)
169       return false;
170     break;
171   }
172 
173   return true;
174 }
175 
176 static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
177   uint64_t TSFlags = MI.getDesc().TSFlags;
178 
179   // Check for EVEX instructions only.
180   if ((TSFlags & X86II::EncodingMask) != X86II::EVEX)
181     return false;
182 
183   // Instructions with mask or 512-bit vector can't be converted to VEX.
184   if (TSFlags & (X86II::EVEX_K | X86II::EVEX_L2))
185     return false;
186 
187   auto IsRedundantNewDataDest = [&](unsigned &Opc) {
188     // $rbx = ADD64rr_ND $rbx, $rax / $rbx = ADD64rr_ND $rax, $rbx
189     //   ->
190     // $rbx = ADD64rr $rbx, $rax
191     const MCInstrDesc &Desc = MI.getDesc();
192     Register Reg0 = MI.getOperand(0).getReg();
193     const MachineOperand &Op1 = MI.getOperand(1);
194     if (!Op1.isReg() || X86::getFirstAddrOperandIdx(MI) == 1 ||
195         X86::isCFCMOVCC(MI.getOpcode()))
196       return false;
197     Register Reg1 = Op1.getReg();
198     if (Reg1 == Reg0)
199       return true;
200 
201     // Op1 and Op2 may be commutable for ND instructions.
202     if (!Desc.isCommutable() || Desc.getNumOperands() < 3 ||
203         !MI.getOperand(2).isReg() || MI.getOperand(2).getReg() != Reg0)
204       return false;
205     // Opcode may change after commute, e.g. SHRD -> SHLD
206     ST.getInstrInfo()->commuteInstruction(MI, false, 1, 2);
207     Opc = MI.getOpcode();
208     return true;
209   };
210 
211   // EVEX_B has several meanings.
212   // AVX512:
213   //  register form: rounding control or SAE
214   //  memory form: broadcast
215   //
216   // APX:
217   //  MAP4: NDD
218   //
219   // For AVX512 cases, EVEX prefix is needed in order to carry this information
220   // thus preventing the transformation to VEX encoding.
221   bool IsND = X86II::hasNewDataDest(TSFlags);
222   if (TSFlags & X86II::EVEX_B && !IsND)
223     return false;
224   unsigned Opc = MI.getOpcode();
225   // MOVBE*rr is special because it has semantic of NDD but not set EVEX_B.
226   bool IsNDLike = IsND || Opc == X86::MOVBE32rr || Opc == X86::MOVBE64rr;
227   bool IsRedundantNDD = IsNDLike ? IsRedundantNewDataDest(Opc) : false;
228 
229   auto GetCompressedOpc = [&](unsigned Opc) -> unsigned {
230     ArrayRef<X86TableEntry> Table = ArrayRef(X86CompressEVEXTable);
231     const auto I = llvm::lower_bound(Table, Opc);
232     if (I == Table.end() || I->OldOpc != Opc)
233       return 0;
234 
235     if (usesExtendedRegister(MI) || !checkPredicate(I->NewOpc, &ST) ||
236         !performCustomAdjustments(MI, I->NewOpc))
237       return 0;
238     return I->NewOpc;
239   };
240   // NonNF -> NF only if it's not a compressible NDD instruction and eflags is
241   // dead.
242   unsigned NewOpc = IsRedundantNDD
243                         ? X86::getNonNDVariant(Opc)
244                         : ((IsNDLike && ST.hasNF() &&
245                             MI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr))
246                                ? X86::getNFVariant(Opc)
247                                : GetCompressedOpc(Opc));
248 
249   if (!NewOpc)
250     return false;
251 
252   const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(NewOpc);
253   MI.setDesc(NewDesc);
254   unsigned AsmComment;
255   switch (NewDesc.TSFlags & X86II::EncodingMask) {
256   case X86II::LEGACY:
257     AsmComment = X86::AC_EVEX_2_LEGACY;
258     break;
259   case X86II::VEX:
260     AsmComment = X86::AC_EVEX_2_VEX;
261     break;
262   case X86II::EVEX:
263     AsmComment = X86::AC_EVEX_2_EVEX;
264     assert(IsND && (NewDesc.TSFlags & X86II::EVEX_NF) &&
265            "Unknown EVEX2EVEX compression");
266     break;
267   default:
268     llvm_unreachable("Unknown EVEX compression");
269   }
270   MI.setAsmPrinterFlag(AsmComment);
271   if (IsRedundantNDD)
272     MI.tieOperands(0, 1);
273 
274   return true;
275 }
276 
277 bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
278 #ifndef NDEBUG
279   // Make sure the tables are sorted.
280   static std::atomic<bool> TableChecked(false);
281   if (!TableChecked.load(std::memory_order_relaxed)) {
282     assert(llvm::is_sorted(X86CompressEVEXTable) &&
283            "X86CompressEVEXTable is not sorted!");
284     TableChecked.store(true, std::memory_order_relaxed);
285   }
286 #endif
287   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
288   if (!ST.hasAVX512() && !ST.hasEGPR() && !ST.hasNDD())
289     return false;
290 
291   bool Changed = false;
292 
293   for (MachineBasicBlock &MBB : MF) {
294     // Traverse the basic block.
295     for (MachineInstr &MI : MBB)
296       Changed |= CompressEVEXImpl(MI, ST);
297   }
298 
299   return Changed;
300 }
301 
302 INITIALIZE_PASS(CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC, false, false)
303 
304 FunctionPass *llvm::createX86CompressEVEXPass() {
305   return new CompressEVEXPass();
306 }
307