1 //===-- X86EncodingOptimization.cpp - X86 Encoding optimization -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of the X86 encoding optimization 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "X86EncodingOptimization.h" 14 #include "X86BaseInfo.h" 15 #include "llvm/MC/MCInst.h" 16 #include "llvm/MC/MCInstrDesc.h" 17 18 using namespace llvm; 19 20 static bool shouldExchange(const MCInst &MI, unsigned OpIdx1, unsigned OpIdx2) { 21 return !X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx1).getReg()) && 22 X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx2).getReg()); 23 } 24 25 bool X86::optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc) { 26 unsigned OpIdx1, OpIdx2; 27 unsigned NewOpc; 28 unsigned Opcode = MI.getOpcode(); 29 #define FROM_TO(FROM, TO, IDX1, IDX2) \ 30 case X86::FROM: \ 31 NewOpc = X86::TO; \ 32 OpIdx1 = IDX1; \ 33 OpIdx2 = IDX2; \ 34 break; 35 #define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 1) 36 switch (MI.getOpcode()) { 37 default: { 38 // If the instruction is a commutable arithmetic instruction we might be 39 // able to commute the operands to get a 2 byte VEX prefix. 40 uint64_t TSFlags = Desc.TSFlags; 41 if (!Desc.isCommutable() || (TSFlags & X86II::EncodingMask) != X86II::VEX || 42 (TSFlags & X86II::OpMapMask) != X86II::TB || 43 (TSFlags & X86II::FormMask) != X86II::MRMSrcReg || 44 (TSFlags & X86II::REX_W) || !(TSFlags & X86II::VEX_4V) || 45 MI.getNumOperands() != 3) 46 return false; 47 // These two are not truly commutable. 48 if (Opcode == X86::VMOVHLPSrr || Opcode == X86::VUNPCKHPDrr) 49 return false; 50 OpIdx1 = 1; 51 OpIdx2 = 2; 52 if (!shouldExchange(MI, OpIdx1, OpIdx2)) 53 return false; 54 std::swap(MI.getOperand(OpIdx1), MI.getOperand(OpIdx2)); 55 return true; 56 } 57 // Commute operands to get a smaller encoding by using VEX.R instead of 58 // VEX.B if one of the registers is extended, but other isn't. 59 FROM_TO(VMOVZPQILo2PQIrr, VMOVPQI2QIrr, 0, 1) 60 TO_REV(VMOVAPDrr) 61 TO_REV(VMOVAPDYrr) 62 TO_REV(VMOVAPSrr) 63 TO_REV(VMOVAPSYrr) 64 TO_REV(VMOVDQArr) 65 TO_REV(VMOVDQAYrr) 66 TO_REV(VMOVDQUrr) 67 TO_REV(VMOVDQUYrr) 68 TO_REV(VMOVUPDrr) 69 TO_REV(VMOVUPDYrr) 70 TO_REV(VMOVUPSrr) 71 TO_REV(VMOVUPSYrr) 72 #undef TO_REV 73 #define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 2) 74 TO_REV(VMOVSDrr) 75 TO_REV(VMOVSSrr) 76 #undef TO_REV 77 #undef FROM_TO 78 } 79 if (!shouldExchange(MI, OpIdx1, OpIdx2)) 80 return false; 81 MI.setOpcode(NewOpc); 82 return true; 83 } 84 85 // NOTE: We may write this as an InstAlias if it's only used by AsmParser. See 86 // validateTargetOperandClass. 87 bool X86::optimizeShiftRotateWithImmediateOne(MCInst &MI) { 88 unsigned NewOpc; 89 #define TO_IMM1(FROM) \ 90 case X86::FROM##i: \ 91 NewOpc = X86::FROM##1; \ 92 break; 93 switch (MI.getOpcode()) { 94 default: 95 return false; 96 TO_IMM1(RCR8r) 97 TO_IMM1(RCR16r) 98 TO_IMM1(RCR32r) 99 TO_IMM1(RCR64r) 100 TO_IMM1(RCL8r) 101 TO_IMM1(RCL16r) 102 TO_IMM1(RCL32r) 103 TO_IMM1(RCL64r) 104 TO_IMM1(ROR8r) 105 TO_IMM1(ROR16r) 106 TO_IMM1(ROR32r) 107 TO_IMM1(ROR64r) 108 TO_IMM1(ROL8r) 109 TO_IMM1(ROL16r) 110 TO_IMM1(ROL32r) 111 TO_IMM1(ROL64r) 112 TO_IMM1(SAR8r) 113 TO_IMM1(SAR16r) 114 TO_IMM1(SAR32r) 115 TO_IMM1(SAR64r) 116 TO_IMM1(SHR8r) 117 TO_IMM1(SHR16r) 118 TO_IMM1(SHR32r) 119 TO_IMM1(SHR64r) 120 TO_IMM1(SHL8r) 121 TO_IMM1(SHL16r) 122 TO_IMM1(SHL32r) 123 TO_IMM1(SHL64r) 124 TO_IMM1(RCR8m) 125 TO_IMM1(RCR16m) 126 TO_IMM1(RCR32m) 127 TO_IMM1(RCR64m) 128 TO_IMM1(RCL8m) 129 TO_IMM1(RCL16m) 130 TO_IMM1(RCL32m) 131 TO_IMM1(RCL64m) 132 TO_IMM1(ROR8m) 133 TO_IMM1(ROR16m) 134 TO_IMM1(ROR32m) 135 TO_IMM1(ROR64m) 136 TO_IMM1(ROL8m) 137 TO_IMM1(ROL16m) 138 TO_IMM1(ROL32m) 139 TO_IMM1(ROL64m) 140 TO_IMM1(SAR8m) 141 TO_IMM1(SAR16m) 142 TO_IMM1(SAR32m) 143 TO_IMM1(SAR64m) 144 TO_IMM1(SHR8m) 145 TO_IMM1(SHR16m) 146 TO_IMM1(SHR32m) 147 TO_IMM1(SHR64m) 148 TO_IMM1(SHL8m) 149 TO_IMM1(SHL16m) 150 TO_IMM1(SHL32m) 151 TO_IMM1(SHL64m) 152 #undef TO_IMM1 153 } 154 MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1); 155 if (!LastOp.isImm() || LastOp.getImm() != 1) 156 return false; 157 MI.setOpcode(NewOpc); 158 MI.erase(&LastOp); 159 return true; 160 } 161 162 bool X86::optimizeVPCMPWithImmediateOneOrSix(MCInst &MI) { 163 unsigned Opc1; 164 unsigned Opc2; 165 #define FROM_TO(FROM, TO1, TO2) \ 166 case X86::FROM: \ 167 Opc1 = X86::TO1; \ 168 Opc2 = X86::TO2; \ 169 break; 170 switch (MI.getOpcode()) { 171 default: 172 return false; 173 FROM_TO(VPCMPBZ128rmi, VPCMPEQBZ128rm, VPCMPGTBZ128rm) 174 FROM_TO(VPCMPBZ128rmik, VPCMPEQBZ128rmk, VPCMPGTBZ128rmk) 175 FROM_TO(VPCMPBZ128rri, VPCMPEQBZ128rr, VPCMPGTBZ128rr) 176 FROM_TO(VPCMPBZ128rrik, VPCMPEQBZ128rrk, VPCMPGTBZ128rrk) 177 FROM_TO(VPCMPBZ256rmi, VPCMPEQBZ256rm, VPCMPGTBZ256rm) 178 FROM_TO(VPCMPBZ256rmik, VPCMPEQBZ256rmk, VPCMPGTBZ256rmk) 179 FROM_TO(VPCMPBZ256rri, VPCMPEQBZ256rr, VPCMPGTBZ256rr) 180 FROM_TO(VPCMPBZ256rrik, VPCMPEQBZ256rrk, VPCMPGTBZ256rrk) 181 FROM_TO(VPCMPBZrmi, VPCMPEQBZrm, VPCMPGTBZrm) 182 FROM_TO(VPCMPBZrmik, VPCMPEQBZrmk, VPCMPGTBZrmk) 183 FROM_TO(VPCMPBZrri, VPCMPEQBZrr, VPCMPGTBZrr) 184 FROM_TO(VPCMPBZrrik, VPCMPEQBZrrk, VPCMPGTBZrrk) 185 FROM_TO(VPCMPDZ128rmi, VPCMPEQDZ128rm, VPCMPGTDZ128rm) 186 FROM_TO(VPCMPDZ128rmib, VPCMPEQDZ128rmb, VPCMPGTDZ128rmb) 187 FROM_TO(VPCMPDZ128rmibk, VPCMPEQDZ128rmbk, VPCMPGTDZ128rmbk) 188 FROM_TO(VPCMPDZ128rmik, VPCMPEQDZ128rmk, VPCMPGTDZ128rmk) 189 FROM_TO(VPCMPDZ128rri, VPCMPEQDZ128rr, VPCMPGTDZ128rr) 190 FROM_TO(VPCMPDZ128rrik, VPCMPEQDZ128rrk, VPCMPGTDZ128rrk) 191 FROM_TO(VPCMPDZ256rmi, VPCMPEQDZ256rm, VPCMPGTDZ256rm) 192 FROM_TO(VPCMPDZ256rmib, VPCMPEQDZ256rmb, VPCMPGTDZ256rmb) 193 FROM_TO(VPCMPDZ256rmibk, VPCMPEQDZ256rmbk, VPCMPGTDZ256rmbk) 194 FROM_TO(VPCMPDZ256rmik, VPCMPEQDZ256rmk, VPCMPGTDZ256rmk) 195 FROM_TO(VPCMPDZ256rri, VPCMPEQDZ256rr, VPCMPGTDZ256rr) 196 FROM_TO(VPCMPDZ256rrik, VPCMPEQDZ256rrk, VPCMPGTDZ256rrk) 197 FROM_TO(VPCMPDZrmi, VPCMPEQDZrm, VPCMPGTDZrm) 198 FROM_TO(VPCMPDZrmib, VPCMPEQDZrmb, VPCMPGTDZrmb) 199 FROM_TO(VPCMPDZrmibk, VPCMPEQDZrmbk, VPCMPGTDZrmbk) 200 FROM_TO(VPCMPDZrmik, VPCMPEQDZrmk, VPCMPGTDZrmk) 201 FROM_TO(VPCMPDZrri, VPCMPEQDZrr, VPCMPGTDZrr) 202 FROM_TO(VPCMPDZrrik, VPCMPEQDZrrk, VPCMPGTDZrrk) 203 FROM_TO(VPCMPQZ128rmi, VPCMPEQQZ128rm, VPCMPGTQZ128rm) 204 FROM_TO(VPCMPQZ128rmib, VPCMPEQQZ128rmb, VPCMPGTQZ128rmb) 205 FROM_TO(VPCMPQZ128rmibk, VPCMPEQQZ128rmbk, VPCMPGTQZ128rmbk) 206 FROM_TO(VPCMPQZ128rmik, VPCMPEQQZ128rmk, VPCMPGTQZ128rmk) 207 FROM_TO(VPCMPQZ128rri, VPCMPEQQZ128rr, VPCMPGTQZ128rr) 208 FROM_TO(VPCMPQZ128rrik, VPCMPEQQZ128rrk, VPCMPGTQZ128rrk) 209 FROM_TO(VPCMPQZ256rmi, VPCMPEQQZ256rm, VPCMPGTQZ256rm) 210 FROM_TO(VPCMPQZ256rmib, VPCMPEQQZ256rmb, VPCMPGTQZ256rmb) 211 FROM_TO(VPCMPQZ256rmibk, VPCMPEQQZ256rmbk, VPCMPGTQZ256rmbk) 212 FROM_TO(VPCMPQZ256rmik, VPCMPEQQZ256rmk, VPCMPGTQZ256rmk) 213 FROM_TO(VPCMPQZ256rri, VPCMPEQQZ256rr, VPCMPGTQZ256rr) 214 FROM_TO(VPCMPQZ256rrik, VPCMPEQQZ256rrk, VPCMPGTQZ256rrk) 215 FROM_TO(VPCMPQZrmi, VPCMPEQQZrm, VPCMPGTQZrm) 216 FROM_TO(VPCMPQZrmib, VPCMPEQQZrmb, VPCMPGTQZrmb) 217 FROM_TO(VPCMPQZrmibk, VPCMPEQQZrmbk, VPCMPGTQZrmbk) 218 FROM_TO(VPCMPQZrmik, VPCMPEQQZrmk, VPCMPGTQZrmk) 219 FROM_TO(VPCMPQZrri, VPCMPEQQZrr, VPCMPGTQZrr) 220 FROM_TO(VPCMPQZrrik, VPCMPEQQZrrk, VPCMPGTQZrrk) 221 FROM_TO(VPCMPWZ128rmi, VPCMPEQWZ128rm, VPCMPGTWZ128rm) 222 FROM_TO(VPCMPWZ128rmik, VPCMPEQWZ128rmk, VPCMPGTWZ128rmk) 223 FROM_TO(VPCMPWZ128rri, VPCMPEQWZ128rr, VPCMPGTWZ128rr) 224 FROM_TO(VPCMPWZ128rrik, VPCMPEQWZ128rrk, VPCMPGTWZ128rrk) 225 FROM_TO(VPCMPWZ256rmi, VPCMPEQWZ256rm, VPCMPGTWZ256rm) 226 FROM_TO(VPCMPWZ256rmik, VPCMPEQWZ256rmk, VPCMPGTWZ256rmk) 227 FROM_TO(VPCMPWZ256rri, VPCMPEQWZ256rr, VPCMPGTWZ256rr) 228 FROM_TO(VPCMPWZ256rrik, VPCMPEQWZ256rrk, VPCMPGTWZ256rrk) 229 FROM_TO(VPCMPWZrmi, VPCMPEQWZrm, VPCMPGTWZrm) 230 FROM_TO(VPCMPWZrmik, VPCMPEQWZrmk, VPCMPGTWZrmk) 231 FROM_TO(VPCMPWZrri, VPCMPEQWZrr, VPCMPGTWZrr) 232 FROM_TO(VPCMPWZrrik, VPCMPEQWZrrk, VPCMPGTWZrrk) 233 #undef FROM_TO 234 } 235 MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1); 236 int64_t Imm = LastOp.getImm(); 237 unsigned NewOpc; 238 if (Imm == 0) 239 NewOpc = Opc1; 240 else if(Imm == 6) 241 NewOpc = Opc2; 242 else 243 return false; 244 MI.setOpcode(NewOpc); 245 MI.erase(&LastOp); 246 return true; 247 } 248 249 bool X86::optimizeMOVSX(MCInst &MI) { 250 unsigned NewOpc; 251 #define FROM_TO(FROM, TO, R0, R1) \ 252 case X86::FROM: \ 253 if (MI.getOperand(0).getReg() != X86::R0 || \ 254 MI.getOperand(1).getReg() != X86::R1) \ 255 return false; \ 256 NewOpc = X86::TO; \ 257 break; 258 switch (MI.getOpcode()) { 259 default: 260 return false; 261 FROM_TO(MOVSX16rr8, CBW, AX, AL) // movsbw %al, %ax --> cbtw 262 FROM_TO(MOVSX32rr16, CWDE, EAX, AX) // movswl %ax, %eax --> cwtl 263 FROM_TO(MOVSX64rr32, CDQE, RAX, EAX) // movslq %eax, %rax --> cltq 264 } 265 MI.clear(); 266 MI.setOpcode(NewOpc); 267 return true; 268 } 269