xref: /llvm-project/llvm/lib/Target/X86/X86CompressEVEX.cpp (revision 0abf3a93a3088140c0585672c8b852e5db93a302)
1 //===- X86CompressEVEX.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
10 // when possible in order to reduce code size or facilitate HW decoding.
11 //
12 // Possible compression:
13 //   a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
14 //   b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy)
15 //   c. NDD (EVEX) -> non-NDD (legacy)
16 //   d. NF_ND (EVEX) -> NF (EVEX)
17 //
18 // Compression a, b and c always reduce code size (some exception)
19 // fourth type of compression can help hardware decode although the instruction
20 // length remains unchanged.
21 //
22 // Compression a, b and c can always reduce code size, with some exceptions
23 // such as promoted 16-bit CRC32 which is as long as the legacy version.
24 //
25 // legacy:
26 //   crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
27 // promoted:
28 //   crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
29 //
30 // From performance perspective, these should be same (same uops and same EXE
31 // ports). From a FMV perspective, an older legacy encoding is preferred b/c it
32 // can execute in more places (broader HW install base). So we will still do
33 // the compression.
34 //
35 // Compression d can help hardware decode (HW may skip reading the NDD
36 // register) although the instruction length remains unchanged.
37 //===----------------------------------------------------------------------===//
38 
39 #include "MCTargetDesc/X86BaseInfo.h"
40 #include "MCTargetDesc/X86InstComments.h"
41 #include "X86.h"
42 #include "X86InstrInfo.h"
43 #include "X86Subtarget.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/MC/MCInstrDesc.h"
50 #include "llvm/Pass.h"
51 #include <atomic>
52 #include <cassert>
53 #include <cstdint>
54 
55 using namespace llvm;
56 
57 // Including the generated EVEX compression tables.
58 struct X86CompressEVEXTableEntry {
59   uint16_t OldOpc;
60   uint16_t NewOpc;
61 
62   bool operator<(const X86CompressEVEXTableEntry &RHS) const {
63     return OldOpc < RHS.OldOpc;
64   }
65 
66   friend bool operator<(const X86CompressEVEXTableEntry &TE, unsigned Opc) {
67     return TE.OldOpc < Opc;
68   }
69 };
70 #include "X86GenCompressEVEXTables.inc"
71 
72 #define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
73 #define COMP_EVEX_NAME "x86-compress-evex"
74 
75 #define DEBUG_TYPE COMP_EVEX_NAME
76 
77 namespace {
78 
79 class CompressEVEXPass : public MachineFunctionPass {
80 public:
81   static char ID;
82   CompressEVEXPass() : MachineFunctionPass(ID) {}
83   StringRef getPassName() const override { return COMP_EVEX_DESC; }
84 
85   bool runOnMachineFunction(MachineFunction &MF) override;
86 
87   // This pass runs after regalloc and doesn't support VReg operands.
88   MachineFunctionProperties getRequiredProperties() const override {
89     return MachineFunctionProperties().set(
90         MachineFunctionProperties::Property::NoVRegs);
91   }
92 };
93 
94 } // end anonymous namespace
95 
96 char CompressEVEXPass::ID = 0;
97 
98 static bool usesExtendedRegister(const MachineInstr &MI) {
99   auto isHiRegIdx = [](unsigned Reg) {
100     // Check for XMM register with indexes between 16 - 31.
101     if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
102       return true;
103     // Check for YMM register with indexes between 16 - 31.
104     if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
105       return true;
106     // Check for GPR with indexes between 16 - 31.
107     if (X86II::isApxExtendedReg(Reg))
108       return true;
109     return false;
110   };
111 
112   // Check that operands are not ZMM regs or
113   // XMM/YMM regs with hi indexes between 16 - 31.
114   for (const MachineOperand &MO : MI.explicit_operands()) {
115     if (!MO.isReg())
116       continue;
117 
118     Register Reg = MO.getReg();
119     assert(!X86II::isZMMReg(Reg) &&
120            "ZMM instructions should not be in the EVEX->VEX tables");
121     if (isHiRegIdx(Reg))
122       return true;
123   }
124 
125   return false;
126 }
127 
128 static bool checkVEXInstPredicate(unsigned OldOpc, const X86Subtarget &ST) {
129   switch (OldOpc) {
130   default:
131     return true;
132   case X86::VCVTNEPS2BF16Z128rm:
133   case X86::VCVTNEPS2BF16Z128rr:
134   case X86::VCVTNEPS2BF16Z256rm:
135   case X86::VCVTNEPS2BF16Z256rr:
136     return ST.hasAVXNECONVERT();
137   case X86::VPDPBUSDSZ128m:
138   case X86::VPDPBUSDSZ128r:
139   case X86::VPDPBUSDSZ256m:
140   case X86::VPDPBUSDSZ256r:
141   case X86::VPDPBUSDZ128m:
142   case X86::VPDPBUSDZ128r:
143   case X86::VPDPBUSDZ256m:
144   case X86::VPDPBUSDZ256r:
145   case X86::VPDPWSSDSZ128m:
146   case X86::VPDPWSSDSZ128r:
147   case X86::VPDPWSSDSZ256m:
148   case X86::VPDPWSSDSZ256r:
149   case X86::VPDPWSSDZ128m:
150   case X86::VPDPWSSDZ128r:
151   case X86::VPDPWSSDZ256m:
152   case X86::VPDPWSSDZ256r:
153     return ST.hasAVXVNNI();
154   case X86::VPMADD52HUQZ128m:
155   case X86::VPMADD52HUQZ128r:
156   case X86::VPMADD52HUQZ256m:
157   case X86::VPMADD52HUQZ256r:
158   case X86::VPMADD52LUQZ128m:
159   case X86::VPMADD52LUQZ128r:
160   case X86::VPMADD52LUQZ256m:
161   case X86::VPMADD52LUQZ256r:
162     return ST.hasAVXIFMA();
163   }
164 }
165 
166 // Do any custom cleanup needed to finalize the conversion.
167 static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
168   (void)NewOpc;
169   unsigned Opc = MI.getOpcode();
170   switch (Opc) {
171   case X86::VALIGNDZ128rri:
172   case X86::VALIGNDZ128rmi:
173   case X86::VALIGNQZ128rri:
174   case X86::VALIGNQZ128rmi: {
175     assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
176            "Unexpected new opcode!");
177     unsigned Scale =
178         (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
179     MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
180     Imm.setImm(Imm.getImm() * Scale);
181     break;
182   }
183   case X86::VSHUFF32X4Z256rmi:
184   case X86::VSHUFF32X4Z256rri:
185   case X86::VSHUFF64X2Z256rmi:
186   case X86::VSHUFF64X2Z256rri:
187   case X86::VSHUFI32X4Z256rmi:
188   case X86::VSHUFI32X4Z256rri:
189   case X86::VSHUFI64X2Z256rmi:
190   case X86::VSHUFI64X2Z256rri: {
191     assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
192             NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
193            "Unexpected new opcode!");
194     MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
195     int64_t ImmVal = Imm.getImm();
196     // Set bit 5, move bit 1 to bit 4, copy bit 0.
197     Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
198     break;
199   }
200   case X86::VRNDSCALEPDZ128rri:
201   case X86::VRNDSCALEPDZ128rmi:
202   case X86::VRNDSCALEPSZ128rri:
203   case X86::VRNDSCALEPSZ128rmi:
204   case X86::VRNDSCALEPDZ256rri:
205   case X86::VRNDSCALEPDZ256rmi:
206   case X86::VRNDSCALEPSZ256rri:
207   case X86::VRNDSCALEPSZ256rmi:
208   case X86::VRNDSCALESDZr:
209   case X86::VRNDSCALESDZm:
210   case X86::VRNDSCALESSZr:
211   case X86::VRNDSCALESSZm:
212   case X86::VRNDSCALESDZr_Int:
213   case X86::VRNDSCALESDZm_Int:
214   case X86::VRNDSCALESSZr_Int:
215   case X86::VRNDSCALESSZm_Int:
216     const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
217     int64_t ImmVal = Imm.getImm();
218     // Ensure that only bits 3:0 of the immediate are used.
219     if ((ImmVal & 0xf) != ImmVal)
220       return false;
221     break;
222   }
223 
224   return true;
225 }
226 
227 static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
228   const MCInstrDesc &Desc = MI.getDesc();
229 
230   // Check for EVEX instructions only.
231   if ((Desc.TSFlags & X86II::EncodingMask) != X86II::EVEX)
232     return false;
233 
234   // Check for EVEX instructions with mask or broadcast as in these cases
235   // the EVEX prefix is needed in order to carry this information
236   // thus preventing the transformation to VEX encoding.
237   if (Desc.TSFlags & (X86II::EVEX_K | X86II::EVEX_B))
238     return false;
239 
240   // Check for EVEX instructions with L2 set. These instructions are 512-bits
241   // and can't be converted to VEX.
242   if (Desc.TSFlags & X86II::EVEX_L2)
243     return false;
244 
245   ArrayRef<X86CompressEVEXTableEntry> Table = ArrayRef(X86CompressEVEXTable);
246 
247   unsigned Opc = MI.getOpcode();
248   const auto *I = llvm::lower_bound(Table, Opc);
249   if (I == Table.end() || I->OldOpc != Opc)
250     return false;
251 
252   if (usesExtendedRegister(MI))
253     return false;
254   if (!checkVEXInstPredicate(Opc, ST))
255     return false;
256   if (!performCustomAdjustments(MI, I->NewOpc))
257     return false;
258 
259   MI.setDesc(ST.getInstrInfo()->get(I->NewOpc));
260   MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
261   return true;
262 }
263 
264 bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
265 #ifndef NDEBUG
266   // Make sure the tables are sorted.
267   static std::atomic<bool> TableChecked(false);
268   if (!TableChecked.load(std::memory_order_relaxed)) {
269     assert(llvm::is_sorted(X86CompressEVEXTable) &&
270            "X86CompressEVEXTable is not sorted!");
271     TableChecked.store(true, std::memory_order_relaxed);
272   }
273 #endif
274   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
275   if (!ST.hasAVX512())
276     return false;
277 
278   bool Changed = false;
279 
280   for (MachineBasicBlock &MBB : MF) {
281     // Traverse the basic block.
282     for (MachineInstr &MI : MBB)
283       Changed |= CompressEVEXImpl(MI, ST);
284   }
285 
286   return Changed;
287 }
288 
289 INITIALIZE_PASS(CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC, false, false)
290 
291 FunctionPass *llvm::createX86CompressEVEXPass() {
292   return new CompressEVEXPass();
293 }
294