xref: /llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp (revision d9610b4a56c532614545eef5995362e99b776535)
1 //===-- X86EncodingOptimization.cpp - X86 Encoding optimization -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of the X86 encoding optimization
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "X86EncodingOptimization.h"
14 #include "X86BaseInfo.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrDesc.h"
17 
18 using namespace llvm;
19 
20 static bool shouldExchange(const MCInst &MI, unsigned OpIdx1, unsigned OpIdx2) {
21   return !X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx1).getReg()) &&
22          X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx2).getReg());
23 }
24 
25 bool X86::optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc) {
26   unsigned OpIdx1, OpIdx2;
27   unsigned NewOpc;
28   unsigned Opcode = MI.getOpcode();
29 #define FROM_TO(FROM, TO, IDX1, IDX2)                                          \
30   case X86::FROM:                                                              \
31     NewOpc = X86::TO;                                                          \
32     OpIdx1 = IDX1;                                                             \
33     OpIdx2 = IDX2;                                                             \
34     break;
35 #define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 1)
36   switch (MI.getOpcode()) {
37   default: {
38     // If the instruction is a commutable arithmetic instruction we might be
39     // able to commute the operands to get a 2 byte VEX prefix.
40     uint64_t TSFlags = Desc.TSFlags;
41     if (!Desc.isCommutable() || (TSFlags & X86II::EncodingMask) != X86II::VEX ||
42         (TSFlags & X86II::OpMapMask) != X86II::TB ||
43         (TSFlags & X86II::FormMask) != X86II::MRMSrcReg ||
44         (TSFlags & X86II::REX_W) || !(TSFlags & X86II::VEX_4V) ||
45         MI.getNumOperands() != 3)
46       return false;
47     // These two are not truly commutable.
48     if (Opcode == X86::VMOVHLPSrr || Opcode == X86::VUNPCKHPDrr)
49       return false;
50     OpIdx1 = 1;
51     OpIdx2 = 2;
52     if (!shouldExchange(MI, OpIdx1, OpIdx2))
53       return false;
54     std::swap(MI.getOperand(OpIdx1), MI.getOperand(OpIdx2));
55     return true;
56   }
57     // Commute operands to get a smaller encoding by using VEX.R instead of
58     // VEX.B if one of the registers is extended, but other isn't.
59     FROM_TO(VMOVZPQILo2PQIrr, VMOVPQI2QIrr, 0, 1)
60     TO_REV(VMOVAPDrr)
61     TO_REV(VMOVAPDYrr)
62     TO_REV(VMOVAPSrr)
63     TO_REV(VMOVAPSYrr)
64     TO_REV(VMOVDQArr)
65     TO_REV(VMOVDQAYrr)
66     TO_REV(VMOVDQUrr)
67     TO_REV(VMOVDQUYrr)
68     TO_REV(VMOVUPDrr)
69     TO_REV(VMOVUPDYrr)
70     TO_REV(VMOVUPSrr)
71     TO_REV(VMOVUPSYrr)
72 #undef TO_REV
73 #define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 2)
74     TO_REV(VMOVSDrr)
75     TO_REV(VMOVSSrr)
76 #undef TO_REV
77 #undef FROM_TO
78   }
79   if (!shouldExchange(MI, OpIdx1, OpIdx2))
80     return false;
81   MI.setOpcode(NewOpc);
82   return true;
83 }
84 
85 // NOTE: We may write this as an InstAlias if it's only used by AsmParser. See
86 // validateTargetOperandClass.
87 bool X86::optimizeShiftRotateWithImmediateOne(MCInst &MI) {
88   unsigned NewOpc;
89 #define TO_IMM1(FROM)                                                          \
90   case X86::FROM##i:                                                           \
91     NewOpc = X86::FROM##1;                                                     \
92     break;
93   switch (MI.getOpcode()) {
94   default:
95     return false;
96     TO_IMM1(RCR8r)
97     TO_IMM1(RCR16r)
98     TO_IMM1(RCR32r)
99     TO_IMM1(RCR64r)
100     TO_IMM1(RCL8r)
101     TO_IMM1(RCL16r)
102     TO_IMM1(RCL32r)
103     TO_IMM1(RCL64r)
104     TO_IMM1(ROR8r)
105     TO_IMM1(ROR16r)
106     TO_IMM1(ROR32r)
107     TO_IMM1(ROR64r)
108     TO_IMM1(ROL8r)
109     TO_IMM1(ROL16r)
110     TO_IMM1(ROL32r)
111     TO_IMM1(ROL64r)
112     TO_IMM1(SAR8r)
113     TO_IMM1(SAR16r)
114     TO_IMM1(SAR32r)
115     TO_IMM1(SAR64r)
116     TO_IMM1(SHR8r)
117     TO_IMM1(SHR16r)
118     TO_IMM1(SHR32r)
119     TO_IMM1(SHR64r)
120     TO_IMM1(SHL8r)
121     TO_IMM1(SHL16r)
122     TO_IMM1(SHL32r)
123     TO_IMM1(SHL64r)
124     TO_IMM1(RCR8m)
125     TO_IMM1(RCR16m)
126     TO_IMM1(RCR32m)
127     TO_IMM1(RCR64m)
128     TO_IMM1(RCL8m)
129     TO_IMM1(RCL16m)
130     TO_IMM1(RCL32m)
131     TO_IMM1(RCL64m)
132     TO_IMM1(ROR8m)
133     TO_IMM1(ROR16m)
134     TO_IMM1(ROR32m)
135     TO_IMM1(ROR64m)
136     TO_IMM1(ROL8m)
137     TO_IMM1(ROL16m)
138     TO_IMM1(ROL32m)
139     TO_IMM1(ROL64m)
140     TO_IMM1(SAR8m)
141     TO_IMM1(SAR16m)
142     TO_IMM1(SAR32m)
143     TO_IMM1(SAR64m)
144     TO_IMM1(SHR8m)
145     TO_IMM1(SHR16m)
146     TO_IMM1(SHR32m)
147     TO_IMM1(SHR64m)
148     TO_IMM1(SHL8m)
149     TO_IMM1(SHL16m)
150     TO_IMM1(SHL32m)
151     TO_IMM1(SHL64m)
152 #undef TO_IMM1
153   }
154   MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);
155   if (!LastOp.isImm() || LastOp.getImm() != 1)
156     return false;
157   MI.setOpcode(NewOpc);
158   MI.erase(&LastOp);
159   return true;
160 }
161 
162 bool X86::optimizeVPCMPWithImmediateOneOrSix(MCInst &MI) {
163   unsigned Opc1;
164   unsigned Opc2;
165 #define FROM_TO(FROM, TO1, TO2)                                                \
166   case X86::FROM:                                                              \
167     Opc1 = X86::TO1;                                                           \
168     Opc2 = X86::TO2;                                                           \
169     break;
170   switch (MI.getOpcode()) {
171   default:
172     return false;
173     FROM_TO(VPCMPBZ128rmi, VPCMPEQBZ128rm, VPCMPGTBZ128rm)
174     FROM_TO(VPCMPBZ128rmik, VPCMPEQBZ128rmk, VPCMPGTBZ128rmk)
175     FROM_TO(VPCMPBZ128rri, VPCMPEQBZ128rr, VPCMPGTBZ128rr)
176     FROM_TO(VPCMPBZ128rrik, VPCMPEQBZ128rrk, VPCMPGTBZ128rrk)
177     FROM_TO(VPCMPBZ256rmi, VPCMPEQBZ256rm, VPCMPGTBZ256rm)
178     FROM_TO(VPCMPBZ256rmik, VPCMPEQBZ256rmk, VPCMPGTBZ256rmk)
179     FROM_TO(VPCMPBZ256rri, VPCMPEQBZ256rr, VPCMPGTBZ256rr)
180     FROM_TO(VPCMPBZ256rrik, VPCMPEQBZ256rrk, VPCMPGTBZ256rrk)
181     FROM_TO(VPCMPBZrmi, VPCMPEQBZrm, VPCMPGTBZrm)
182     FROM_TO(VPCMPBZrmik, VPCMPEQBZrmk, VPCMPGTBZrmk)
183     FROM_TO(VPCMPBZrri, VPCMPEQBZrr, VPCMPGTBZrr)
184     FROM_TO(VPCMPBZrrik, VPCMPEQBZrrk, VPCMPGTBZrrk)
185     FROM_TO(VPCMPDZ128rmi, VPCMPEQDZ128rm, VPCMPGTDZ128rm)
186     FROM_TO(VPCMPDZ128rmib, VPCMPEQDZ128rmb, VPCMPGTDZ128rmb)
187     FROM_TO(VPCMPDZ128rmibk, VPCMPEQDZ128rmbk, VPCMPGTDZ128rmbk)
188     FROM_TO(VPCMPDZ128rmik, VPCMPEQDZ128rmk, VPCMPGTDZ128rmk)
189     FROM_TO(VPCMPDZ128rri, VPCMPEQDZ128rr, VPCMPGTDZ128rr)
190     FROM_TO(VPCMPDZ128rrik, VPCMPEQDZ128rrk, VPCMPGTDZ128rrk)
191     FROM_TO(VPCMPDZ256rmi, VPCMPEQDZ256rm, VPCMPGTDZ256rm)
192     FROM_TO(VPCMPDZ256rmib, VPCMPEQDZ256rmb, VPCMPGTDZ256rmb)
193     FROM_TO(VPCMPDZ256rmibk, VPCMPEQDZ256rmbk, VPCMPGTDZ256rmbk)
194     FROM_TO(VPCMPDZ256rmik, VPCMPEQDZ256rmk, VPCMPGTDZ256rmk)
195     FROM_TO(VPCMPDZ256rri, VPCMPEQDZ256rr, VPCMPGTDZ256rr)
196     FROM_TO(VPCMPDZ256rrik, VPCMPEQDZ256rrk, VPCMPGTDZ256rrk)
197     FROM_TO(VPCMPDZrmi, VPCMPEQDZrm, VPCMPGTDZrm)
198     FROM_TO(VPCMPDZrmib, VPCMPEQDZrmb, VPCMPGTDZrmb)
199     FROM_TO(VPCMPDZrmibk, VPCMPEQDZrmbk, VPCMPGTDZrmbk)
200     FROM_TO(VPCMPDZrmik, VPCMPEQDZrmk, VPCMPGTDZrmk)
201     FROM_TO(VPCMPDZrri, VPCMPEQDZrr, VPCMPGTDZrr)
202     FROM_TO(VPCMPDZrrik, VPCMPEQDZrrk, VPCMPGTDZrrk)
203     FROM_TO(VPCMPQZ128rmi, VPCMPEQQZ128rm, VPCMPGTQZ128rm)
204     FROM_TO(VPCMPQZ128rmib, VPCMPEQQZ128rmb, VPCMPGTQZ128rmb)
205     FROM_TO(VPCMPQZ128rmibk, VPCMPEQQZ128rmbk, VPCMPGTQZ128rmbk)
206     FROM_TO(VPCMPQZ128rmik, VPCMPEQQZ128rmk, VPCMPGTQZ128rmk)
207     FROM_TO(VPCMPQZ128rri, VPCMPEQQZ128rr, VPCMPGTQZ128rr)
208     FROM_TO(VPCMPQZ128rrik, VPCMPEQQZ128rrk, VPCMPGTQZ128rrk)
209     FROM_TO(VPCMPQZ256rmi, VPCMPEQQZ256rm, VPCMPGTQZ256rm)
210     FROM_TO(VPCMPQZ256rmib, VPCMPEQQZ256rmb, VPCMPGTQZ256rmb)
211     FROM_TO(VPCMPQZ256rmibk, VPCMPEQQZ256rmbk, VPCMPGTQZ256rmbk)
212     FROM_TO(VPCMPQZ256rmik, VPCMPEQQZ256rmk, VPCMPGTQZ256rmk)
213     FROM_TO(VPCMPQZ256rri, VPCMPEQQZ256rr, VPCMPGTQZ256rr)
214     FROM_TO(VPCMPQZ256rrik, VPCMPEQQZ256rrk, VPCMPGTQZ256rrk)
215     FROM_TO(VPCMPQZrmi, VPCMPEQQZrm, VPCMPGTQZrm)
216     FROM_TO(VPCMPQZrmib, VPCMPEQQZrmb, VPCMPGTQZrmb)
217     FROM_TO(VPCMPQZrmibk, VPCMPEQQZrmbk, VPCMPGTQZrmbk)
218     FROM_TO(VPCMPQZrmik, VPCMPEQQZrmk, VPCMPGTQZrmk)
219     FROM_TO(VPCMPQZrri, VPCMPEQQZrr, VPCMPGTQZrr)
220     FROM_TO(VPCMPQZrrik, VPCMPEQQZrrk, VPCMPGTQZrrk)
221     FROM_TO(VPCMPWZ128rmi, VPCMPEQWZ128rm, VPCMPGTWZ128rm)
222     FROM_TO(VPCMPWZ128rmik, VPCMPEQWZ128rmk, VPCMPGTWZ128rmk)
223     FROM_TO(VPCMPWZ128rri, VPCMPEQWZ128rr, VPCMPGTWZ128rr)
224     FROM_TO(VPCMPWZ128rrik, VPCMPEQWZ128rrk, VPCMPGTWZ128rrk)
225     FROM_TO(VPCMPWZ256rmi, VPCMPEQWZ256rm, VPCMPGTWZ256rm)
226     FROM_TO(VPCMPWZ256rmik, VPCMPEQWZ256rmk, VPCMPGTWZ256rmk)
227     FROM_TO(VPCMPWZ256rri, VPCMPEQWZ256rr, VPCMPGTWZ256rr)
228     FROM_TO(VPCMPWZ256rrik, VPCMPEQWZ256rrk, VPCMPGTWZ256rrk)
229     FROM_TO(VPCMPWZrmi, VPCMPEQWZrm, VPCMPGTWZrm)
230     FROM_TO(VPCMPWZrmik, VPCMPEQWZrmk, VPCMPGTWZrmk)
231     FROM_TO(VPCMPWZrri, VPCMPEQWZrr, VPCMPGTWZrr)
232     FROM_TO(VPCMPWZrrik, VPCMPEQWZrrk, VPCMPGTWZrrk)
233 #undef FROM_TO
234   }
235   MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);
236   int64_t Imm = LastOp.getImm();
237   unsigned NewOpc;
238   if (Imm == 0)
239     NewOpc = Opc1;
240   else if(Imm == 6)
241     NewOpc = Opc2;
242   else
243     return false;
244   MI.setOpcode(NewOpc);
245   MI.erase(&LastOp);
246   return true;
247 }
248 
249 bool X86::optimizeMOVSX(MCInst &MI) {
250   unsigned NewOpc;
251 #define FROM_TO(FROM, TO, R0, R1)                                              \
252   case X86::FROM:                                                              \
253     if (MI.getOperand(0).getReg() != X86::R0 ||                                \
254         MI.getOperand(1).getReg() != X86::R1)                                  \
255       return false;                                                            \
256     NewOpc = X86::TO;                                                          \
257     break;
258   switch (MI.getOpcode()) {
259   default:
260     return false;
261     FROM_TO(MOVSX16rr8, CBW, AX, AL)     // movsbw %al, %ax   --> cbtw
262     FROM_TO(MOVSX32rr16, CWDE, EAX, AX)  // movswl %ax, %eax  --> cwtl
263     FROM_TO(MOVSX64rr32, CDQE, RAX, EAX) // movslq %eax, %rax --> cltq
264   }
265   MI.clear();
266   MI.setOpcode(NewOpc);
267   return true;
268 }
269